]> git.proxmox.com Git - mirror_qemu.git/blob - accel/tcg/cputlb.c
Merge remote-tracking branch 'remotes/bonzini-gitlab/tags/for-upstream' into staging
[mirror_qemu.git] / accel / tcg / cputlb.c
1 /*
2 * Common CPU TLB handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "hw/core/tcg-cpu-ops.h"
23 #include "exec/exec-all.h"
24 #include "exec/memory.h"
25 #include "exec/cpu_ldst.h"
26 #include "exec/cputlb.h"
27 #include "exec/tb-hash.h"
28 #include "exec/memory-internal.h"
29 #include "exec/ram_addr.h"
30 #include "tcg/tcg.h"
31 #include "qemu/error-report.h"
32 #include "exec/log.h"
33 #include "exec/helper-proto.h"
34 #include "qemu/atomic.h"
35 #include "qemu/atomic128.h"
36 #include "exec/translate-all.h"
37 #include "trace/trace-root.h"
38 #include "trace/mem.h"
39 #include "internal.h"
40 #ifdef CONFIG_PLUGIN
41 #include "qemu/plugin-memory.h"
42 #endif
43
44 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
45 /* #define DEBUG_TLB */
46 /* #define DEBUG_TLB_LOG */
47
48 #ifdef DEBUG_TLB
49 # define DEBUG_TLB_GATE 1
50 # ifdef DEBUG_TLB_LOG
51 # define DEBUG_TLB_LOG_GATE 1
52 # else
53 # define DEBUG_TLB_LOG_GATE 0
54 # endif
55 #else
56 # define DEBUG_TLB_GATE 0
57 # define DEBUG_TLB_LOG_GATE 0
58 #endif
59
60 #define tlb_debug(fmt, ...) do { \
61 if (DEBUG_TLB_LOG_GATE) { \
62 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
63 ## __VA_ARGS__); \
64 } else if (DEBUG_TLB_GATE) { \
65 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
66 } \
67 } while (0)
68
69 #define assert_cpu_is_self(cpu) do { \
70 if (DEBUG_TLB_GATE) { \
71 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \
72 } \
73 } while (0)
74
75 /* run_on_cpu_data.target_ptr should always be big enough for a
76 * target_ulong even on 32 bit builds */
77 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
78
79 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
80 */
81 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
82 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
83
84 static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
85 {
86 return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
87 }
88
89 static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
90 {
91 return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
92 }
93
94 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
95 size_t max_entries)
96 {
97 desc->window_begin_ns = ns;
98 desc->window_max_entries = max_entries;
99 }
100
101 static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
102 {
103 unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);
104
105 for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
106 qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
107 }
108 }
109
110 static void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
111 {
112 /* Discard jump cache entries for any tb which might potentially
113 overlap the flushed page. */
114 tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
115 tb_jmp_cache_clear_page(cpu, addr);
116 }
117
118 /**
119 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
120 * @desc: The CPUTLBDesc portion of the TLB
121 * @fast: The CPUTLBDescFast portion of the same TLB
122 *
123 * Called with tlb_lock_held.
124 *
125 * We have two main constraints when resizing a TLB: (1) we only resize it
126 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
127 * the array or unnecessarily flushing it), which means we do not control how
128 * frequently the resizing can occur; (2) we don't have access to the guest's
129 * future scheduling decisions, and therefore have to decide the magnitude of
130 * the resize based on past observations.
131 *
132 * In general, a memory-hungry process can benefit greatly from an appropriately
133 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
134 * we just have to make the TLB as large as possible; while an oversized TLB
135 * results in minimal TLB miss rates, it also takes longer to be flushed
136 * (flushes can be _very_ frequent), and the reduced locality can also hurt
137 * performance.
138 *
139 * To achieve near-optimal performance for all kinds of workloads, we:
140 *
141 * 1. Aggressively increase the size of the TLB when the use rate of the
142 * TLB being flushed is high, since it is likely that in the near future this
143 * memory-hungry process will execute again, and its memory hungriness will
144 * probably be similar.
145 *
146 * 2. Slowly reduce the size of the TLB as the use rate declines over a
147 * reasonably large time window. The rationale is that if in such a time window
148 * we have not observed a high TLB use rate, it is likely that we won't observe
149 * it in the near future. In that case, once a time window expires we downsize
150 * the TLB to match the maximum use rate observed in the window.
151 *
152 * 3. Try to keep the maximum use rate in a time window in the 30-70% range,
153 * since in that range performance is likely near-optimal. Recall that the TLB
154 * is direct mapped, so we want the use rate to be low (or at least not too
155 * high), since otherwise we are likely to have a significant amount of
156 * conflict misses.
157 */
158 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
159 int64_t now)
160 {
161 size_t old_size = tlb_n_entries(fast);
162 size_t rate;
163 size_t new_size = old_size;
164 int64_t window_len_ms = 100;
165 int64_t window_len_ns = window_len_ms * 1000 * 1000;
166 bool window_expired = now > desc->window_begin_ns + window_len_ns;
167
168 if (desc->n_used_entries > desc->window_max_entries) {
169 desc->window_max_entries = desc->n_used_entries;
170 }
171 rate = desc->window_max_entries * 100 / old_size;
172
173 if (rate > 70) {
174 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS);
175 } else if (rate < 30 && window_expired) {
176 size_t ceil = pow2ceil(desc->window_max_entries);
177 size_t expected_rate = desc->window_max_entries * 100 / ceil;
178
179 /*
180 * Avoid undersizing when the max number of entries seen is just below
181 * a pow2. For instance, if max_entries == 1025, the expected use rate
182 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get
183 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size
184 * later. Thus, make sure that the expected use rate remains below 70%.
185 * (and since we double the size, that means the lowest rate we'd
186 * expect to get is 35%, which is still in the 30-70% range where
187 * we consider that the size is appropriate.)
188 */
189 if (expected_rate > 70) {
190 ceil *= 2;
191 }
192 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS);
193 }
194
195 if (new_size == old_size) {
196 if (window_expired) {
197 tlb_window_reset(desc, now, desc->n_used_entries);
198 }
199 return;
200 }
201
202 g_free(fast->table);
203 g_free(desc->iotlb);
204
205 tlb_window_reset(desc, now, 0);
206 /* desc->n_used_entries is cleared by the caller */
207 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
208 fast->table = g_try_new(CPUTLBEntry, new_size);
209 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
210
211 /*
212 * If the allocations fail, try smaller sizes. We just freed some
213 * memory, so going back to half of new_size has a good chance of working.
214 * Increased memory pressure elsewhere in the system might cause the
215 * allocations to fail though, so we progressively reduce the allocation
216 * size, aborting if we cannot even allocate the smallest TLB we support.
217 */
218 while (fast->table == NULL || desc->iotlb == NULL) {
219 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
220 error_report("%s: %s", __func__, strerror(errno));
221 abort();
222 }
223 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
224 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
225
226 g_free(fast->table);
227 g_free(desc->iotlb);
228 fast->table = g_try_new(CPUTLBEntry, new_size);
229 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
230 }
231 }
232
233 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
234 {
235 desc->n_used_entries = 0;
236 desc->large_page_addr = -1;
237 desc->large_page_mask = -1;
238 desc->vindex = 0;
239 memset(fast->table, -1, sizeof_tlb(fast));
240 memset(desc->vtable, -1, sizeof(desc->vtable));
241 }
242
243 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx,
244 int64_t now)
245 {
246 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
247 CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
248
249 tlb_mmu_resize_locked(desc, fast, now);
250 tlb_mmu_flush_locked(desc, fast);
251 }
252
253 static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
254 {
255 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
256
257 tlb_window_reset(desc, now, 0);
258 desc->n_used_entries = 0;
259 fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
260 fast->table = g_new(CPUTLBEntry, n_entries);
261 desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
262 tlb_mmu_flush_locked(desc, fast);
263 }
264
265 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
266 {
267 env_tlb(env)->d[mmu_idx].n_used_entries++;
268 }
269
270 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
271 {
272 env_tlb(env)->d[mmu_idx].n_used_entries--;
273 }
274
275 void tlb_init(CPUState *cpu)
276 {
277 CPUArchState *env = cpu->env_ptr;
278 int64_t now = get_clock_realtime();
279 int i;
280
281 qemu_spin_init(&env_tlb(env)->c.lock);
282
283 /* All tlbs are initialized flushed. */
284 env_tlb(env)->c.dirty = 0;
285
286 for (i = 0; i < NB_MMU_MODES; i++) {
287 tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
288 }
289 }
290
291 void tlb_destroy(CPUState *cpu)
292 {
293 CPUArchState *env = cpu->env_ptr;
294 int i;
295
296 qemu_spin_destroy(&env_tlb(env)->c.lock);
297 for (i = 0; i < NB_MMU_MODES; i++) {
298 CPUTLBDesc *desc = &env_tlb(env)->d[i];
299 CPUTLBDescFast *fast = &env_tlb(env)->f[i];
300
301 g_free(fast->table);
302 g_free(desc->iotlb);
303 }
304 }
305
306 /* flush_all_helper: run fn across all cpus
307 *
308 * If the wait flag is set then the src cpu's helper will be queued as
309 * "safe" work and the loop exited creating a synchronisation point
310 * where all queued work will be finished before execution starts
311 * again.
312 */
313 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
314 run_on_cpu_data d)
315 {
316 CPUState *cpu;
317
318 CPU_FOREACH(cpu) {
319 if (cpu != src) {
320 async_run_on_cpu(cpu, fn, d);
321 }
322 }
323 }
324
325 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
326 {
327 CPUState *cpu;
328 size_t full = 0, part = 0, elide = 0;
329
330 CPU_FOREACH(cpu) {
331 CPUArchState *env = cpu->env_ptr;
332
333 full += qatomic_read(&env_tlb(env)->c.full_flush_count);
334 part += qatomic_read(&env_tlb(env)->c.part_flush_count);
335 elide += qatomic_read(&env_tlb(env)->c.elide_flush_count);
336 }
337 *pfull = full;
338 *ppart = part;
339 *pelide = elide;
340 }
341
342 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
343 {
344 CPUArchState *env = cpu->env_ptr;
345 uint16_t asked = data.host_int;
346 uint16_t all_dirty, work, to_clean;
347 int64_t now = get_clock_realtime();
348
349 assert_cpu_is_self(cpu);
350
351 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
352
353 qemu_spin_lock(&env_tlb(env)->c.lock);
354
355 all_dirty = env_tlb(env)->c.dirty;
356 to_clean = asked & all_dirty;
357 all_dirty &= ~to_clean;
358 env_tlb(env)->c.dirty = all_dirty;
359
360 for (work = to_clean; work != 0; work &= work - 1) {
361 int mmu_idx = ctz32(work);
362 tlb_flush_one_mmuidx_locked(env, mmu_idx, now);
363 }
364
365 qemu_spin_unlock(&env_tlb(env)->c.lock);
366
367 cpu_tb_jmp_cache_clear(cpu);
368
369 if (to_clean == ALL_MMUIDX_BITS) {
370 qatomic_set(&env_tlb(env)->c.full_flush_count,
371 env_tlb(env)->c.full_flush_count + 1);
372 } else {
373 qatomic_set(&env_tlb(env)->c.part_flush_count,
374 env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
375 if (to_clean != asked) {
376 qatomic_set(&env_tlb(env)->c.elide_flush_count,
377 env_tlb(env)->c.elide_flush_count +
378 ctpop16(asked & ~to_clean));
379 }
380 }
381 }
382
383 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
384 {
385 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
386
387 if (cpu->created && !qemu_cpu_is_self(cpu)) {
388 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
389 RUN_ON_CPU_HOST_INT(idxmap));
390 } else {
391 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
392 }
393 }
394
395 void tlb_flush(CPUState *cpu)
396 {
397 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
398 }
399
400 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
401 {
402 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
403
404 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
405
406 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
407 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
408 }
409
410 void tlb_flush_all_cpus(CPUState *src_cpu)
411 {
412 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
413 }
414
415 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
416 {
417 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
418
419 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
420
421 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
422 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
423 }
424
425 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
426 {
427 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
428 }
429
430 static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry,
431 target_ulong page, target_ulong mask)
432 {
433 page &= mask;
434 mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK;
435
436 return (page == (tlb_entry->addr_read & mask) ||
437 page == (tlb_addr_write(tlb_entry) & mask) ||
438 page == (tlb_entry->addr_code & mask));
439 }
440
441 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
442 target_ulong page)
443 {
444 return tlb_hit_page_mask_anyprot(tlb_entry, page, -1);
445 }
446
447 /**
448 * tlb_entry_is_empty - return true if the entry is not in use
449 * @te: pointer to CPUTLBEntry
450 */
451 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
452 {
453 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
454 }
455
456 /* Called with tlb_c.lock held */
457 static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry,
458 target_ulong page,
459 target_ulong mask)
460 {
461 if (tlb_hit_page_mask_anyprot(tlb_entry, page, mask)) {
462 memset(tlb_entry, -1, sizeof(*tlb_entry));
463 return true;
464 }
465 return false;
466 }
467
468 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
469 target_ulong page)
470 {
471 return tlb_flush_entry_mask_locked(tlb_entry, page, -1);
472 }
473
474 /* Called with tlb_c.lock held */
475 static void tlb_flush_vtlb_page_mask_locked(CPUArchState *env, int mmu_idx,
476 target_ulong page,
477 target_ulong mask)
478 {
479 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
480 int k;
481
482 assert_cpu_is_self(env_cpu(env));
483 for (k = 0; k < CPU_VTLB_SIZE; k++) {
484 if (tlb_flush_entry_mask_locked(&d->vtable[k], page, mask)) {
485 tlb_n_used_entries_dec(env, mmu_idx);
486 }
487 }
488 }
489
490 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
491 target_ulong page)
492 {
493 tlb_flush_vtlb_page_mask_locked(env, mmu_idx, page, -1);
494 }
495
496 static void tlb_flush_page_locked(CPUArchState *env, int midx,
497 target_ulong page)
498 {
499 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
500 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
501
502 /* Check if we need to flush due to large pages. */
503 if ((page & lp_mask) == lp_addr) {
504 tlb_debug("forcing full flush midx %d ("
505 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
506 midx, lp_addr, lp_mask);
507 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
508 } else {
509 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
510 tlb_n_used_entries_dec(env, midx);
511 }
512 tlb_flush_vtlb_page_locked(env, midx, page);
513 }
514 }
515
516 /**
517 * tlb_flush_page_by_mmuidx_async_0:
518 * @cpu: cpu on which to flush
519 * @addr: page of virtual address to flush
520 * @idxmap: set of mmu_idx to flush
521 *
522 * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
523 * at @addr from the tlbs indicated by @idxmap from @cpu.
524 */
525 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
526 target_ulong addr,
527 uint16_t idxmap)
528 {
529 CPUArchState *env = cpu->env_ptr;
530 int mmu_idx;
531
532 assert_cpu_is_self(cpu);
533
534 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
535
536 qemu_spin_lock(&env_tlb(env)->c.lock);
537 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
538 if ((idxmap >> mmu_idx) & 1) {
539 tlb_flush_page_locked(env, mmu_idx, addr);
540 }
541 }
542 qemu_spin_unlock(&env_tlb(env)->c.lock);
543
544 tb_flush_jmp_cache(cpu, addr);
545 }
546
547 /**
548 * tlb_flush_page_by_mmuidx_async_1:
549 * @cpu: cpu on which to flush
550 * @data: encoded addr + idxmap
551 *
552 * Helper for tlb_flush_page_by_mmuidx and friends, called through
553 * async_run_on_cpu. The idxmap parameter is encoded in the page
554 * offset of the target_ptr field. This limits the set of mmu_idx
555 * that can be passed via this method.
556 */
557 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
558 run_on_cpu_data data)
559 {
560 target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
561 target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
562 uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
563
564 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
565 }
566
567 typedef struct {
568 target_ulong addr;
569 uint16_t idxmap;
570 } TLBFlushPageByMMUIdxData;
571
572 /**
573 * tlb_flush_page_by_mmuidx_async_2:
574 * @cpu: cpu on which to flush
575 * @data: allocated addr + idxmap
576 *
577 * Helper for tlb_flush_page_by_mmuidx and friends, called through
578 * async_run_on_cpu. The addr+idxmap parameters are stored in a
579 * TLBFlushPageByMMUIdxData structure that has been allocated
580 * specifically for this helper. Free the structure when done.
581 */
582 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
583 run_on_cpu_data data)
584 {
585 TLBFlushPageByMMUIdxData *d = data.host_ptr;
586
587 tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
588 g_free(d);
589 }
590
591 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
592 {
593 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
594
595 /* This should already be page aligned */
596 addr &= TARGET_PAGE_MASK;
597
598 if (qemu_cpu_is_self(cpu)) {
599 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
600 } else if (idxmap < TARGET_PAGE_SIZE) {
601 /*
602 * Most targets have only a few mmu_idx. In the case where
603 * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
604 * allocating memory for this operation.
605 */
606 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
607 RUN_ON_CPU_TARGET_PTR(addr | idxmap));
608 } else {
609 TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
610
611 /* Otherwise allocate a structure, freed by the worker. */
612 d->addr = addr;
613 d->idxmap = idxmap;
614 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
615 RUN_ON_CPU_HOST_PTR(d));
616 }
617 }
618
619 void tlb_flush_page(CPUState *cpu, target_ulong addr)
620 {
621 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
622 }
623
624 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
625 uint16_t idxmap)
626 {
627 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
628
629 /* This should already be page aligned */
630 addr &= TARGET_PAGE_MASK;
631
632 /*
633 * Allocate memory to hold addr+idxmap only when needed.
634 * See tlb_flush_page_by_mmuidx for details.
635 */
636 if (idxmap < TARGET_PAGE_SIZE) {
637 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
638 RUN_ON_CPU_TARGET_PTR(addr | idxmap));
639 } else {
640 CPUState *dst_cpu;
641
642 /* Allocate a separate data block for each destination cpu. */
643 CPU_FOREACH(dst_cpu) {
644 if (dst_cpu != src_cpu) {
645 TLBFlushPageByMMUIdxData *d
646 = g_new(TLBFlushPageByMMUIdxData, 1);
647
648 d->addr = addr;
649 d->idxmap = idxmap;
650 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
651 RUN_ON_CPU_HOST_PTR(d));
652 }
653 }
654 }
655
656 tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
657 }
658
659 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
660 {
661 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
662 }
663
664 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
665 target_ulong addr,
666 uint16_t idxmap)
667 {
668 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
669
670 /* This should already be page aligned */
671 addr &= TARGET_PAGE_MASK;
672
673 /*
674 * Allocate memory to hold addr+idxmap only when needed.
675 * See tlb_flush_page_by_mmuidx for details.
676 */
677 if (idxmap < TARGET_PAGE_SIZE) {
678 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
679 RUN_ON_CPU_TARGET_PTR(addr | idxmap));
680 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
681 RUN_ON_CPU_TARGET_PTR(addr | idxmap));
682 } else {
683 CPUState *dst_cpu;
684 TLBFlushPageByMMUIdxData *d;
685
686 /* Allocate a separate data block for each destination cpu. */
687 CPU_FOREACH(dst_cpu) {
688 if (dst_cpu != src_cpu) {
689 d = g_new(TLBFlushPageByMMUIdxData, 1);
690 d->addr = addr;
691 d->idxmap = idxmap;
692 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
693 RUN_ON_CPU_HOST_PTR(d));
694 }
695 }
696
697 d = g_new(TLBFlushPageByMMUIdxData, 1);
698 d->addr = addr;
699 d->idxmap = idxmap;
700 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
701 RUN_ON_CPU_HOST_PTR(d));
702 }
703 }
704
705 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
706 {
707 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
708 }
709
710 static void tlb_flush_page_bits_locked(CPUArchState *env, int midx,
711 target_ulong page, unsigned bits)
712 {
713 CPUTLBDesc *d = &env_tlb(env)->d[midx];
714 CPUTLBDescFast *f = &env_tlb(env)->f[midx];
715 target_ulong mask = MAKE_64BIT_MASK(0, bits);
716
717 /*
718 * If @bits is smaller than the tlb size, there may be multiple entries
719 * within the TLB; otherwise all addresses that match under @mask hit
720 * the same TLB entry.
721 *
722 * TODO: Perhaps allow bits to be a few bits less than the size.
723 * For now, just flush the entire TLB.
724 */
725 if (mask < f->mask) {
726 tlb_debug("forcing full flush midx %d ("
727 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
728 midx, page, mask);
729 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
730 return;
731 }
732
733 /* Check if we need to flush due to large pages. */
734 if ((page & d->large_page_mask) == d->large_page_addr) {
735 tlb_debug("forcing full flush midx %d ("
736 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
737 midx, d->large_page_addr, d->large_page_mask);
738 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
739 return;
740 }
741
742 if (tlb_flush_entry_mask_locked(tlb_entry(env, midx, page), page, mask)) {
743 tlb_n_used_entries_dec(env, midx);
744 }
745 tlb_flush_vtlb_page_mask_locked(env, midx, page, mask);
746 }
747
748 typedef struct {
749 target_ulong addr;
750 uint16_t idxmap;
751 uint16_t bits;
752 } TLBFlushPageBitsByMMUIdxData;
753
754 static void
755 tlb_flush_page_bits_by_mmuidx_async_0(CPUState *cpu,
756 TLBFlushPageBitsByMMUIdxData d)
757 {
758 CPUArchState *env = cpu->env_ptr;
759 int mmu_idx;
760
761 assert_cpu_is_self(cpu);
762
763 tlb_debug("page addr:" TARGET_FMT_lx "/%u mmu_map:0x%x\n",
764 d.addr, d.bits, d.idxmap);
765
766 qemu_spin_lock(&env_tlb(env)->c.lock);
767 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
768 if ((d.idxmap >> mmu_idx) & 1) {
769 tlb_flush_page_bits_locked(env, mmu_idx, d.addr, d.bits);
770 }
771 }
772 qemu_spin_unlock(&env_tlb(env)->c.lock);
773
774 tb_flush_jmp_cache(cpu, d.addr);
775 }
776
777 static bool encode_pbm_to_runon(run_on_cpu_data *out,
778 TLBFlushPageBitsByMMUIdxData d)
779 {
780 /* We need 6 bits to hold to hold @bits up to 63. */
781 if (d.idxmap <= MAKE_64BIT_MASK(0, TARGET_PAGE_BITS - 6)) {
782 *out = RUN_ON_CPU_TARGET_PTR(d.addr | (d.idxmap << 6) | d.bits);
783 return true;
784 }
785 return false;
786 }
787
788 static TLBFlushPageBitsByMMUIdxData
789 decode_runon_to_pbm(run_on_cpu_data data)
790 {
791 target_ulong addr_map_bits = (target_ulong) data.target_ptr;
792 return (TLBFlushPageBitsByMMUIdxData){
793 .addr = addr_map_bits & TARGET_PAGE_MASK,
794 .idxmap = (addr_map_bits & ~TARGET_PAGE_MASK) >> 6,
795 .bits = addr_map_bits & 0x3f
796 };
797 }
798
799 static void tlb_flush_page_bits_by_mmuidx_async_1(CPUState *cpu,
800 run_on_cpu_data runon)
801 {
802 tlb_flush_page_bits_by_mmuidx_async_0(cpu, decode_runon_to_pbm(runon));
803 }
804
805 static void tlb_flush_page_bits_by_mmuidx_async_2(CPUState *cpu,
806 run_on_cpu_data data)
807 {
808 TLBFlushPageBitsByMMUIdxData *d = data.host_ptr;
809 tlb_flush_page_bits_by_mmuidx_async_0(cpu, *d);
810 g_free(d);
811 }
812
813 void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr,
814 uint16_t idxmap, unsigned bits)
815 {
816 TLBFlushPageBitsByMMUIdxData d;
817 run_on_cpu_data runon;
818
819 /* If all bits are significant, this devolves to tlb_flush_page. */
820 if (bits >= TARGET_LONG_BITS) {
821 tlb_flush_page_by_mmuidx(cpu, addr, idxmap);
822 return;
823 }
824 /* If no page bits are significant, this devolves to tlb_flush. */
825 if (bits < TARGET_PAGE_BITS) {
826 tlb_flush_by_mmuidx(cpu, idxmap);
827 return;
828 }
829
830 /* This should already be page aligned */
831 d.addr = addr & TARGET_PAGE_MASK;
832 d.idxmap = idxmap;
833 d.bits = bits;
834
835 if (qemu_cpu_is_self(cpu)) {
836 tlb_flush_page_bits_by_mmuidx_async_0(cpu, d);
837 } else if (encode_pbm_to_runon(&runon, d)) {
838 async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon);
839 } else {
840 TLBFlushPageBitsByMMUIdxData *p
841 = g_new(TLBFlushPageBitsByMMUIdxData, 1);
842
843 /* Otherwise allocate a structure, freed by the worker. */
844 *p = d;
845 async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_2,
846 RUN_ON_CPU_HOST_PTR(p));
847 }
848 }
849
850 void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu,
851 target_ulong addr,
852 uint16_t idxmap,
853 unsigned bits)
854 {
855 TLBFlushPageBitsByMMUIdxData d;
856 run_on_cpu_data runon;
857
858 /* If all bits are significant, this devolves to tlb_flush_page. */
859 if (bits >= TARGET_LONG_BITS) {
860 tlb_flush_page_by_mmuidx_all_cpus(src_cpu, addr, idxmap);
861 return;
862 }
863 /* If no page bits are significant, this devolves to tlb_flush. */
864 if (bits < TARGET_PAGE_BITS) {
865 tlb_flush_by_mmuidx_all_cpus(src_cpu, idxmap);
866 return;
867 }
868
869 /* This should already be page aligned */
870 d.addr = addr & TARGET_PAGE_MASK;
871 d.idxmap = idxmap;
872 d.bits = bits;
873
874 if (encode_pbm_to_runon(&runon, d)) {
875 flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon);
876 } else {
877 CPUState *dst_cpu;
878 TLBFlushPageBitsByMMUIdxData *p;
879
880 /* Allocate a separate data block for each destination cpu. */
881 CPU_FOREACH(dst_cpu) {
882 if (dst_cpu != src_cpu) {
883 p = g_new(TLBFlushPageBitsByMMUIdxData, 1);
884 *p = d;
885 async_run_on_cpu(dst_cpu,
886 tlb_flush_page_bits_by_mmuidx_async_2,
887 RUN_ON_CPU_HOST_PTR(p));
888 }
889 }
890 }
891
892 tlb_flush_page_bits_by_mmuidx_async_0(src_cpu, d);
893 }
894
895 void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
896 target_ulong addr,
897 uint16_t idxmap,
898 unsigned bits)
899 {
900 TLBFlushPageBitsByMMUIdxData d;
901 run_on_cpu_data runon;
902
903 /* If all bits are significant, this devolves to tlb_flush_page. */
904 if (bits >= TARGET_LONG_BITS) {
905 tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap);
906 return;
907 }
908 /* If no page bits are significant, this devolves to tlb_flush. */
909 if (bits < TARGET_PAGE_BITS) {
910 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, idxmap);
911 return;
912 }
913
914 /* This should already be page aligned */
915 d.addr = addr & TARGET_PAGE_MASK;
916 d.idxmap = idxmap;
917 d.bits = bits;
918
919 if (encode_pbm_to_runon(&runon, d)) {
920 flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon);
921 async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1,
922 runon);
923 } else {
924 CPUState *dst_cpu;
925 TLBFlushPageBitsByMMUIdxData *p;
926
927 /* Allocate a separate data block for each destination cpu. */
928 CPU_FOREACH(dst_cpu) {
929 if (dst_cpu != src_cpu) {
930 p = g_new(TLBFlushPageBitsByMMUIdxData, 1);
931 *p = d;
932 async_run_on_cpu(dst_cpu, tlb_flush_page_bits_by_mmuidx_async_2,
933 RUN_ON_CPU_HOST_PTR(p));
934 }
935 }
936
937 p = g_new(TLBFlushPageBitsByMMUIdxData, 1);
938 *p = d;
939 async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_2,
940 RUN_ON_CPU_HOST_PTR(p));
941 }
942 }
943
944 /* update the TLBs so that writes to code in the virtual page 'addr'
945 can be detected */
946 void tlb_protect_code(ram_addr_t ram_addr)
947 {
948 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
949 DIRTY_MEMORY_CODE);
950 }
951
952 /* update the TLB so that writes in physical page 'phys_addr' are no longer
953 tested for self modifying code */
954 void tlb_unprotect_code(ram_addr_t ram_addr)
955 {
956 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
957 }
958
959
960 /*
961 * Dirty write flag handling
962 *
963 * When the TCG code writes to a location it looks up the address in
964 * the TLB and uses that data to compute the final address. If any of
965 * the lower bits of the address are set then the slow path is forced.
966 * There are a number of reasons to do this but for normal RAM the
967 * most usual is detecting writes to code regions which may invalidate
968 * generated code.
969 *
970 * Other vCPUs might be reading their TLBs during guest execution, so we update
971 * te->addr_write with qatomic_set. We don't need to worry about this for
972 * oversized guests as MTTCG is disabled for them.
973 *
974 * Called with tlb_c.lock held.
975 */
976 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
977 uintptr_t start, uintptr_t length)
978 {
979 uintptr_t addr = tlb_entry->addr_write;
980
981 if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
982 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
983 addr &= TARGET_PAGE_MASK;
984 addr += tlb_entry->addend;
985 if ((addr - start) < length) {
986 #if TCG_OVERSIZED_GUEST
987 tlb_entry->addr_write |= TLB_NOTDIRTY;
988 #else
989 qatomic_set(&tlb_entry->addr_write,
990 tlb_entry->addr_write | TLB_NOTDIRTY);
991 #endif
992 }
993 }
994 }
995
996 /*
997 * Called with tlb_c.lock held.
998 * Called only from the vCPU context, i.e. the TLB's owner thread.
999 */
1000 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
1001 {
1002 *d = *s;
1003 }
1004
1005 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
1006 * the target vCPU).
1007 * We must take tlb_c.lock to avoid racing with another vCPU update. The only
1008 * thing actually updated is the target TLB entry ->addr_write flags.
1009 */
1010 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
1011 {
1012 CPUArchState *env;
1013
1014 int mmu_idx;
1015
1016 env = cpu->env_ptr;
1017 qemu_spin_lock(&env_tlb(env)->c.lock);
1018 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1019 unsigned int i;
1020 unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
1021
1022 for (i = 0; i < n; i++) {
1023 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
1024 start1, length);
1025 }
1026
1027 for (i = 0; i < CPU_VTLB_SIZE; i++) {
1028 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
1029 start1, length);
1030 }
1031 }
1032 qemu_spin_unlock(&env_tlb(env)->c.lock);
1033 }
1034
1035 /* Called with tlb_c.lock held */
1036 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
1037 target_ulong vaddr)
1038 {
1039 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
1040 tlb_entry->addr_write = vaddr;
1041 }
1042 }
1043
1044 /* update the TLB corresponding to virtual page vaddr
1045 so that it is no longer dirty */
1046 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
1047 {
1048 CPUArchState *env = cpu->env_ptr;
1049 int mmu_idx;
1050
1051 assert_cpu_is_self(cpu);
1052
1053 vaddr &= TARGET_PAGE_MASK;
1054 qemu_spin_lock(&env_tlb(env)->c.lock);
1055 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1056 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
1057 }
1058
1059 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1060 int k;
1061 for (k = 0; k < CPU_VTLB_SIZE; k++) {
1062 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr);
1063 }
1064 }
1065 qemu_spin_unlock(&env_tlb(env)->c.lock);
1066 }
1067
1068 /* Our TLB does not support large pages, so remember the area covered by
1069 large pages and trigger a full TLB flush if these are invalidated. */
1070 static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
1071 target_ulong vaddr, target_ulong size)
1072 {
1073 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
1074 target_ulong lp_mask = ~(size - 1);
1075
1076 if (lp_addr == (target_ulong)-1) {
1077 /* No previous large page. */
1078 lp_addr = vaddr;
1079 } else {
1080 /* Extend the existing region to include the new page.
1081 This is a compromise between unnecessary flushes and
1082 the cost of maintaining a full variable size TLB. */
1083 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
1084 while (((lp_addr ^ vaddr) & lp_mask) != 0) {
1085 lp_mask <<= 1;
1086 }
1087 }
1088 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
1089 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
1090 }
1091
1092 /* Add a new TLB entry. At most one entry for a given virtual address
1093 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
1094 * supplied size is only used by tlb_flush_page.
1095 *
1096 * Called from TCG-generated code, which is under an RCU read-side
1097 * critical section.
1098 */
1099 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
1100 hwaddr paddr, MemTxAttrs attrs, int prot,
1101 int mmu_idx, target_ulong size)
1102 {
1103 CPUArchState *env = cpu->env_ptr;
1104 CPUTLB *tlb = env_tlb(env);
1105 CPUTLBDesc *desc = &tlb->d[mmu_idx];
1106 MemoryRegionSection *section;
1107 unsigned int index;
1108 target_ulong address;
1109 target_ulong write_address;
1110 uintptr_t addend;
1111 CPUTLBEntry *te, tn;
1112 hwaddr iotlb, xlat, sz, paddr_page;
1113 target_ulong vaddr_page;
1114 int asidx = cpu_asidx_from_attrs(cpu, attrs);
1115 int wp_flags;
1116 bool is_ram, is_romd;
1117
1118 assert_cpu_is_self(cpu);
1119
1120 if (size <= TARGET_PAGE_SIZE) {
1121 sz = TARGET_PAGE_SIZE;
1122 } else {
1123 tlb_add_large_page(env, mmu_idx, vaddr, size);
1124 sz = size;
1125 }
1126 vaddr_page = vaddr & TARGET_PAGE_MASK;
1127 paddr_page = paddr & TARGET_PAGE_MASK;
1128
1129 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
1130 &xlat, &sz, attrs, &prot);
1131 assert(sz >= TARGET_PAGE_SIZE);
1132
1133 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
1134 " prot=%x idx=%d\n",
1135 vaddr, paddr, prot, mmu_idx);
1136
1137 address = vaddr_page;
1138 if (size < TARGET_PAGE_SIZE) {
1139 /* Repeat the MMU check and TLB fill on every access. */
1140 address |= TLB_INVALID_MASK;
1141 }
1142 if (attrs.byte_swap) {
1143 address |= TLB_BSWAP;
1144 }
1145
1146 is_ram = memory_region_is_ram(section->mr);
1147 is_romd = memory_region_is_romd(section->mr);
1148
1149 if (is_ram || is_romd) {
1150 /* RAM and ROMD both have associated host memory. */
1151 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
1152 } else {
1153 /* I/O does not; force the host address to NULL. */
1154 addend = 0;
1155 }
1156
1157 write_address = address;
1158 if (is_ram) {
1159 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1160 /*
1161 * Computing is_clean is expensive; avoid all that unless
1162 * the page is actually writable.
1163 */
1164 if (prot & PAGE_WRITE) {
1165 if (section->readonly) {
1166 write_address |= TLB_DISCARD_WRITE;
1167 } else if (cpu_physical_memory_is_clean(iotlb)) {
1168 write_address |= TLB_NOTDIRTY;
1169 }
1170 }
1171 } else {
1172 /* I/O or ROMD */
1173 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
1174 /*
1175 * Writes to romd devices must go through MMIO to enable write.
1176 * Reads to romd devices go through the ram_ptr found above,
1177 * but of course reads to I/O must go through MMIO.
1178 */
1179 write_address |= TLB_MMIO;
1180 if (!is_romd) {
1181 address = write_address;
1182 }
1183 }
1184
1185 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
1186 TARGET_PAGE_SIZE);
1187
1188 index = tlb_index(env, mmu_idx, vaddr_page);
1189 te = tlb_entry(env, mmu_idx, vaddr_page);
1190
1191 /*
1192 * Hold the TLB lock for the rest of the function. We could acquire/release
1193 * the lock several times in the function, but it is faster to amortize the
1194 * acquisition cost by acquiring it just once. Note that this leads to
1195 * a longer critical section, but this is not a concern since the TLB lock
1196 * is unlikely to be contended.
1197 */
1198 qemu_spin_lock(&tlb->c.lock);
1199
1200 /* Note that the tlb is no longer clean. */
1201 tlb->c.dirty |= 1 << mmu_idx;
1202
1203 /* Make sure there's no cached translation for the new page. */
1204 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
1205
1206 /*
1207 * Only evict the old entry to the victim tlb if it's for a
1208 * different page; otherwise just overwrite the stale data.
1209 */
1210 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
1211 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
1212 CPUTLBEntry *tv = &desc->vtable[vidx];
1213
1214 /* Evict the old entry into the victim tlb. */
1215 copy_tlb_helper_locked(tv, te);
1216 desc->viotlb[vidx] = desc->iotlb[index];
1217 tlb_n_used_entries_dec(env, mmu_idx);
1218 }
1219
1220 /* refill the tlb */
1221 /*
1222 * At this point iotlb contains a physical section number in the lower
1223 * TARGET_PAGE_BITS, and either
1224 * + the ram_addr_t of the page base of the target RAM (RAM)
1225 * + the offset within section->mr of the page base (I/O, ROMD)
1226 * We subtract the vaddr_page (which is page aligned and thus won't
1227 * disturb the low bits) to give an offset which can be added to the
1228 * (non-page-aligned) vaddr of the eventual memory access to get
1229 * the MemoryRegion offset for the access. Note that the vaddr we
1230 * subtract here is that of the page base, and not the same as the
1231 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
1232 */
1233 desc->iotlb[index].addr = iotlb - vaddr_page;
1234 desc->iotlb[index].attrs = attrs;
1235
1236 /* Now calculate the new entry */
1237 tn.addend = addend - vaddr_page;
1238 if (prot & PAGE_READ) {
1239 tn.addr_read = address;
1240 if (wp_flags & BP_MEM_READ) {
1241 tn.addr_read |= TLB_WATCHPOINT;
1242 }
1243 } else {
1244 tn.addr_read = -1;
1245 }
1246
1247 if (prot & PAGE_EXEC) {
1248 tn.addr_code = address;
1249 } else {
1250 tn.addr_code = -1;
1251 }
1252
1253 tn.addr_write = -1;
1254 if (prot & PAGE_WRITE) {
1255 tn.addr_write = write_address;
1256 if (prot & PAGE_WRITE_INV) {
1257 tn.addr_write |= TLB_INVALID_MASK;
1258 }
1259 if (wp_flags & BP_MEM_WRITE) {
1260 tn.addr_write |= TLB_WATCHPOINT;
1261 }
1262 }
1263
1264 copy_tlb_helper_locked(te, &tn);
1265 tlb_n_used_entries_inc(env, mmu_idx);
1266 qemu_spin_unlock(&tlb->c.lock);
1267 }
1268
1269 /* Add a new TLB entry, but without specifying the memory
1270 * transaction attributes to be used.
1271 */
1272 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
1273 hwaddr paddr, int prot,
1274 int mmu_idx, target_ulong size)
1275 {
1276 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
1277 prot, mmu_idx, size);
1278 }
1279
1280 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1281 {
1282 ram_addr_t ram_addr;
1283
1284 ram_addr = qemu_ram_addr_from_host(ptr);
1285 if (ram_addr == RAM_ADDR_INVALID) {
1286 error_report("Bad ram pointer %p", ptr);
1287 abort();
1288 }
1289 return ram_addr;
1290 }
1291
1292 /*
1293 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
1294 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
1295 * be discarded and looked up again (e.g. via tlb_entry()).
1296 */
1297 static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
1298 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1299 {
1300 CPUClass *cc = CPU_GET_CLASS(cpu);
1301 bool ok;
1302
1303 /*
1304 * This is not a probe, so only valid return is success; failure
1305 * should result in exception + longjmp to the cpu loop.
1306 */
1307 ok = cc->tcg_ops->tlb_fill(cpu, addr, size,
1308 access_type, mmu_idx, false, retaddr);
1309 assert(ok);
1310 }
1311
1312 static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
1313 MMUAccessType access_type,
1314 int mmu_idx, uintptr_t retaddr)
1315 {
1316 CPUClass *cc = CPU_GET_CLASS(cpu);
1317
1318 cc->tcg_ops->do_unaligned_access(cpu, addr, access_type, mmu_idx, retaddr);
1319 }
1320
1321 static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
1322 vaddr addr, unsigned size,
1323 MMUAccessType access_type,
1324 int mmu_idx, MemTxAttrs attrs,
1325 MemTxResult response,
1326 uintptr_t retaddr)
1327 {
1328 CPUClass *cc = CPU_GET_CLASS(cpu);
1329
1330 if (!cpu->ignore_memory_transaction_failures &&
1331 cc->tcg_ops->do_transaction_failed) {
1332 cc->tcg_ops->do_transaction_failed(cpu, physaddr, addr, size,
1333 access_type, mmu_idx, attrs,
1334 response, retaddr);
1335 }
1336 }
1337
1338 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
1339 int mmu_idx, target_ulong addr, uintptr_t retaddr,
1340 MMUAccessType access_type, MemOp op)
1341 {
1342 CPUState *cpu = env_cpu(env);
1343 hwaddr mr_offset;
1344 MemoryRegionSection *section;
1345 MemoryRegion *mr;
1346 uint64_t val;
1347 bool locked = false;
1348 MemTxResult r;
1349
1350 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1351 mr = section->mr;
1352 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1353 cpu->mem_io_pc = retaddr;
1354 if (!cpu->can_do_io) {
1355 cpu_io_recompile(cpu, retaddr);
1356 }
1357
1358 if (!qemu_mutex_iothread_locked()) {
1359 qemu_mutex_lock_iothread();
1360 locked = true;
1361 }
1362 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
1363 if (r != MEMTX_OK) {
1364 hwaddr physaddr = mr_offset +
1365 section->offset_within_address_space -
1366 section->offset_within_region;
1367
1368 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
1369 mmu_idx, iotlbentry->attrs, r, retaddr);
1370 }
1371 if (locked) {
1372 qemu_mutex_unlock_iothread();
1373 }
1374
1375 return val;
1376 }
1377
1378 /*
1379 * Save a potentially trashed IOTLB entry for later lookup by plugin.
1380 * This is read by tlb_plugin_lookup if the iotlb entry doesn't match
1381 * because of the side effect of io_writex changing memory layout.
1382 */
1383 static void save_iotlb_data(CPUState *cs, hwaddr addr,
1384 MemoryRegionSection *section, hwaddr mr_offset)
1385 {
1386 #ifdef CONFIG_PLUGIN
1387 SavedIOTLB *saved = &cs->saved_iotlb;
1388 saved->addr = addr;
1389 saved->section = section;
1390 saved->mr_offset = mr_offset;
1391 #endif
1392 }
1393
1394 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
1395 int mmu_idx, uint64_t val, target_ulong addr,
1396 uintptr_t retaddr, MemOp op)
1397 {
1398 CPUState *cpu = env_cpu(env);
1399 hwaddr mr_offset;
1400 MemoryRegionSection *section;
1401 MemoryRegion *mr;
1402 bool locked = false;
1403 MemTxResult r;
1404
1405 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1406 mr = section->mr;
1407 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1408 if (!cpu->can_do_io) {
1409 cpu_io_recompile(cpu, retaddr);
1410 }
1411 cpu->mem_io_pc = retaddr;
1412
1413 /*
1414 * The memory_region_dispatch may trigger a flush/resize
1415 * so for plugins we save the iotlb_data just in case.
1416 */
1417 save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset);
1418
1419 if (!qemu_mutex_iothread_locked()) {
1420 qemu_mutex_lock_iothread();
1421 locked = true;
1422 }
1423 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
1424 if (r != MEMTX_OK) {
1425 hwaddr physaddr = mr_offset +
1426 section->offset_within_address_space -
1427 section->offset_within_region;
1428
1429 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
1430 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
1431 retaddr);
1432 }
1433 if (locked) {
1434 qemu_mutex_unlock_iothread();
1435 }
1436 }
1437
1438 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
1439 {
1440 #if TCG_OVERSIZED_GUEST
1441 return *(target_ulong *)((uintptr_t)entry + ofs);
1442 #else
1443 /* ofs might correspond to .addr_write, so use qatomic_read */
1444 return qatomic_read((target_ulong *)((uintptr_t)entry + ofs));
1445 #endif
1446 }
1447
1448 /* Return true if ADDR is present in the victim tlb, and has been copied
1449 back to the main tlb. */
1450 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
1451 size_t elt_ofs, target_ulong page)
1452 {
1453 size_t vidx;
1454
1455 assert_cpu_is_self(env_cpu(env));
1456 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
1457 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
1458 target_ulong cmp;
1459
1460 /* elt_ofs might correspond to .addr_write, so use qatomic_read */
1461 #if TCG_OVERSIZED_GUEST
1462 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
1463 #else
1464 cmp = qatomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
1465 #endif
1466
1467 if (cmp == page) {
1468 /* Found entry in victim tlb, swap tlb and iotlb. */
1469 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index];
1470
1471 qemu_spin_lock(&env_tlb(env)->c.lock);
1472 copy_tlb_helper_locked(&tmptlb, tlb);
1473 copy_tlb_helper_locked(tlb, vtlb);
1474 copy_tlb_helper_locked(vtlb, &tmptlb);
1475 qemu_spin_unlock(&env_tlb(env)->c.lock);
1476
1477 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
1478 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
1479 tmpio = *io; *io = *vio; *vio = tmpio;
1480 return true;
1481 }
1482 }
1483 return false;
1484 }
1485
1486 /* Macro to call the above, with local variables from the use context. */
1487 #define VICTIM_TLB_HIT(TY, ADDR) \
1488 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
1489 (ADDR) & TARGET_PAGE_MASK)
1490
1491 /*
1492 * Return a ram_addr_t for the virtual address for execution.
1493 *
1494 * Return -1 if we can't translate and execute from an entire page
1495 * of RAM. This will force us to execute by loading and translating
1496 * one insn at a time, without caching.
1497 *
1498 * NOTE: This function will trigger an exception if the page is
1499 * not executable.
1500 */
1501 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
1502 void **hostp)
1503 {
1504 uintptr_t mmu_idx = cpu_mmu_index(env, true);
1505 uintptr_t index = tlb_index(env, mmu_idx, addr);
1506 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1507 void *p;
1508
1509 if (unlikely(!tlb_hit(entry->addr_code, addr))) {
1510 if (!VICTIM_TLB_HIT(addr_code, addr)) {
1511 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
1512 index = tlb_index(env, mmu_idx, addr);
1513 entry = tlb_entry(env, mmu_idx, addr);
1514
1515 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
1516 /*
1517 * The MMU protection covers a smaller range than a target
1518 * page, so we must redo the MMU check for every insn.
1519 */
1520 return -1;
1521 }
1522 }
1523 assert(tlb_hit(entry->addr_code, addr));
1524 }
1525
1526 if (unlikely(entry->addr_code & TLB_MMIO)) {
1527 /* The region is not backed by RAM. */
1528 if (hostp) {
1529 *hostp = NULL;
1530 }
1531 return -1;
1532 }
1533
1534 p = (void *)((uintptr_t)addr + entry->addend);
1535 if (hostp) {
1536 *hostp = p;
1537 }
1538 return qemu_ram_addr_from_host_nofail(p);
1539 }
1540
1541 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
1542 {
1543 return get_page_addr_code_hostp(env, addr, NULL);
1544 }
1545
1546 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
1547 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
1548 {
1549 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
1550
1551 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
1552
1553 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1554 struct page_collection *pages
1555 = page_collection_lock(ram_addr, ram_addr + size);
1556 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr);
1557 page_collection_unlock(pages);
1558 }
1559
1560 /*
1561 * Set both VGA and migration bits for simplicity and to remove
1562 * the notdirty callback faster.
1563 */
1564 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
1565
1566 /* We remove the notdirty callback only if the code has been flushed. */
1567 if (!cpu_physical_memory_is_clean(ram_addr)) {
1568 trace_memory_notdirty_set_dirty(mem_vaddr);
1569 tlb_set_dirty(cpu, mem_vaddr);
1570 }
1571 }
1572
1573 static int probe_access_internal(CPUArchState *env, target_ulong addr,
1574 int fault_size, MMUAccessType access_type,
1575 int mmu_idx, bool nonfault,
1576 void **phost, uintptr_t retaddr)
1577 {
1578 uintptr_t index = tlb_index(env, mmu_idx, addr);
1579 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1580 target_ulong tlb_addr, page_addr;
1581 size_t elt_ofs;
1582 int flags;
1583
1584 switch (access_type) {
1585 case MMU_DATA_LOAD:
1586 elt_ofs = offsetof(CPUTLBEntry, addr_read);
1587 break;
1588 case MMU_DATA_STORE:
1589 elt_ofs = offsetof(CPUTLBEntry, addr_write);
1590 break;
1591 case MMU_INST_FETCH:
1592 elt_ofs = offsetof(CPUTLBEntry, addr_code);
1593 break;
1594 default:
1595 g_assert_not_reached();
1596 }
1597 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1598
1599 page_addr = addr & TARGET_PAGE_MASK;
1600 if (!tlb_hit_page(tlb_addr, page_addr)) {
1601 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
1602 CPUState *cs = env_cpu(env);
1603 CPUClass *cc = CPU_GET_CLASS(cs);
1604
1605 if (!cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
1606 mmu_idx, nonfault, retaddr)) {
1607 /* Non-faulting page table read failed. */
1608 *phost = NULL;
1609 return TLB_INVALID_MASK;
1610 }
1611
1612 /* TLB resize via tlb_fill may have moved the entry. */
1613 entry = tlb_entry(env, mmu_idx, addr);
1614 }
1615 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1616 }
1617 flags = tlb_addr & TLB_FLAGS_MASK;
1618
1619 /* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */
1620 if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
1621 *phost = NULL;
1622 return TLB_MMIO;
1623 }
1624
1625 /* Everything else is RAM. */
1626 *phost = (void *)((uintptr_t)addr + entry->addend);
1627 return flags;
1628 }
1629
1630 int probe_access_flags(CPUArchState *env, target_ulong addr,
1631 MMUAccessType access_type, int mmu_idx,
1632 bool nonfault, void **phost, uintptr_t retaddr)
1633 {
1634 int flags;
1635
1636 flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
1637 nonfault, phost, retaddr);
1638
1639 /* Handle clean RAM pages. */
1640 if (unlikely(flags & TLB_NOTDIRTY)) {
1641 uintptr_t index = tlb_index(env, mmu_idx, addr);
1642 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1643
1644 notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
1645 flags &= ~TLB_NOTDIRTY;
1646 }
1647
1648 return flags;
1649 }
1650
1651 void *probe_access(CPUArchState *env, target_ulong addr, int size,
1652 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1653 {
1654 void *host;
1655 int flags;
1656
1657 g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1658
1659 flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
1660 false, &host, retaddr);
1661
1662 /* Per the interface, size == 0 merely faults the access. */
1663 if (size == 0) {
1664 return NULL;
1665 }
1666
1667 if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
1668 uintptr_t index = tlb_index(env, mmu_idx, addr);
1669 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1670
1671 /* Handle watchpoints. */
1672 if (flags & TLB_WATCHPOINT) {
1673 int wp_access = (access_type == MMU_DATA_STORE
1674 ? BP_MEM_WRITE : BP_MEM_READ);
1675 cpu_check_watchpoint(env_cpu(env), addr, size,
1676 iotlbentry->attrs, wp_access, retaddr);
1677 }
1678
1679 /* Handle clean RAM pages. */
1680 if (flags & TLB_NOTDIRTY) {
1681 notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
1682 }
1683 }
1684
1685 return host;
1686 }
1687
1688 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
1689 MMUAccessType access_type, int mmu_idx)
1690 {
1691 void *host;
1692 int flags;
1693
1694 flags = probe_access_internal(env, addr, 0, access_type,
1695 mmu_idx, true, &host, 0);
1696
1697 /* No combination of flags are expected by the caller. */
1698 return flags ? NULL : host;
1699 }
1700
1701 #ifdef CONFIG_PLUGIN
1702 /*
1703 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
1704 * This should be a hot path as we will have just looked this path up
1705 * in the softmmu lookup code (or helper). We don't handle re-fills or
1706 * checking the victim table. This is purely informational.
1707 *
1708 * This almost never fails as the memory access being instrumented
1709 * should have just filled the TLB. The one corner case is io_writex
1710 * which can cause TLB flushes and potential resizing of the TLBs
1711 * losing the information we need. In those cases we need to recover
1712 * data from a copy of the iotlbentry. As long as this always occurs
1713 * from the same thread (which a mem callback will be) this is safe.
1714 */
1715
1716 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
1717 bool is_store, struct qemu_plugin_hwaddr *data)
1718 {
1719 CPUArchState *env = cpu->env_ptr;
1720 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1721 uintptr_t index = tlb_index(env, mmu_idx, addr);
1722 target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
1723
1724 if (likely(tlb_hit(tlb_addr, addr))) {
1725 /* We must have an iotlb entry for MMIO */
1726 if (tlb_addr & TLB_MMIO) {
1727 CPUIOTLBEntry *iotlbentry;
1728 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1729 data->is_io = true;
1730 data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1731 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1732 } else {
1733 data->is_io = false;
1734 data->v.ram.hostaddr = addr + tlbe->addend;
1735 }
1736 return true;
1737 } else {
1738 SavedIOTLB *saved = &cpu->saved_iotlb;
1739 data->is_io = true;
1740 data->v.io.section = saved->section;
1741 data->v.io.offset = saved->mr_offset;
1742 return true;
1743 }
1744 }
1745
1746 #endif
1747
1748 /* Probe for a read-modify-write atomic operation. Do not allow unaligned
1749 * operations, or io operations to proceed. Return the host address. */
1750 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
1751 TCGMemOpIdx oi, uintptr_t retaddr)
1752 {
1753 size_t mmu_idx = get_mmuidx(oi);
1754 uintptr_t index = tlb_index(env, mmu_idx, addr);
1755 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1756 target_ulong tlb_addr = tlb_addr_write(tlbe);
1757 MemOp mop = get_memop(oi);
1758 int a_bits = get_alignment_bits(mop);
1759 int s_bits = mop & MO_SIZE;
1760 void *hostaddr;
1761
1762 /* Adjust the given return address. */
1763 retaddr -= GETPC_ADJ;
1764
1765 /* Enforce guest required alignment. */
1766 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
1767 /* ??? Maybe indicate atomic op to cpu_unaligned_access */
1768 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1769 mmu_idx, retaddr);
1770 }
1771
1772 /* Enforce qemu required alignment. */
1773 if (unlikely(addr & ((1 << s_bits) - 1))) {
1774 /* We get here if guest alignment was not requested,
1775 or was not enforced by cpu_unaligned_access above.
1776 We might widen the access and emulate, but for now
1777 mark an exception and exit the cpu loop. */
1778 goto stop_the_world;
1779 }
1780
1781 /* Check TLB entry and enforce page permissions. */
1782 if (!tlb_hit(tlb_addr, addr)) {
1783 if (!VICTIM_TLB_HIT(addr_write, addr)) {
1784 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE,
1785 mmu_idx, retaddr);
1786 index = tlb_index(env, mmu_idx, addr);
1787 tlbe = tlb_entry(env, mmu_idx, addr);
1788 }
1789 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1790 }
1791
1792 /* Notice an IO access or a needs-MMU-lookup access */
1793 if (unlikely(tlb_addr & TLB_MMIO)) {
1794 /* There's really nothing that can be done to
1795 support this apart from stop-the-world. */
1796 goto stop_the_world;
1797 }
1798
1799 /* Let the guest notice RMW on a write-only page. */
1800 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1801 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1802 mmu_idx, retaddr);
1803 /* Since we don't support reads and writes to different addresses,
1804 and we do have the proper page loaded for write, this shouldn't
1805 ever return. But just in case, handle via stop-the-world. */
1806 goto stop_the_world;
1807 }
1808
1809 hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1810
1811 if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1812 notdirty_write(env_cpu(env), addr, 1 << s_bits,
1813 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
1814 }
1815
1816 return hostaddr;
1817
1818 stop_the_world:
1819 cpu_loop_exit_atomic(env_cpu(env), retaddr);
1820 }
1821
1822 /*
1823 * Load Helpers
1824 *
1825 * We support two different access types. SOFTMMU_CODE_ACCESS is
1826 * specifically for reading instructions from system memory. It is
1827 * called by the translation loop and in some helpers where the code
1828 * is disassembled. It shouldn't be called directly by guest code.
1829 */
1830
1831 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
1832 TCGMemOpIdx oi, uintptr_t retaddr);
1833
1834 static inline uint64_t QEMU_ALWAYS_INLINE
1835 load_memop(const void *haddr, MemOp op)
1836 {
1837 switch (op) {
1838 case MO_UB:
1839 return ldub_p(haddr);
1840 case MO_BEUW:
1841 return lduw_be_p(haddr);
1842 case MO_LEUW:
1843 return lduw_le_p(haddr);
1844 case MO_BEUL:
1845 return (uint32_t)ldl_be_p(haddr);
1846 case MO_LEUL:
1847 return (uint32_t)ldl_le_p(haddr);
1848 case MO_BEQ:
1849 return ldq_be_p(haddr);
1850 case MO_LEQ:
1851 return ldq_le_p(haddr);
1852 default:
1853 qemu_build_not_reached();
1854 }
1855 }
1856
1857 static inline uint64_t QEMU_ALWAYS_INLINE
1858 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
1859 uintptr_t retaddr, MemOp op, bool code_read,
1860 FullLoadHelper *full_load)
1861 {
1862 uintptr_t mmu_idx = get_mmuidx(oi);
1863 uintptr_t index = tlb_index(env, mmu_idx, addr);
1864 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1865 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1866 const size_t tlb_off = code_read ?
1867 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
1868 const MMUAccessType access_type =
1869 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
1870 unsigned a_bits = get_alignment_bits(get_memop(oi));
1871 void *haddr;
1872 uint64_t res;
1873 size_t size = memop_size(op);
1874
1875 /* Handle CPU specific unaligned behaviour */
1876 if (addr & ((1 << a_bits) - 1)) {
1877 cpu_unaligned_access(env_cpu(env), addr, access_type,
1878 mmu_idx, retaddr);
1879 }
1880
1881 /* If the TLB entry is for a different page, reload and try again. */
1882 if (!tlb_hit(tlb_addr, addr)) {
1883 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1884 addr & TARGET_PAGE_MASK)) {
1885 tlb_fill(env_cpu(env), addr, size,
1886 access_type, mmu_idx, retaddr);
1887 index = tlb_index(env, mmu_idx, addr);
1888 entry = tlb_entry(env, mmu_idx, addr);
1889 }
1890 tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1891 tlb_addr &= ~TLB_INVALID_MASK;
1892 }
1893
1894 /* Handle anything that isn't just a straight memory access. */
1895 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1896 CPUIOTLBEntry *iotlbentry;
1897 bool need_swap;
1898
1899 /* For anything that is unaligned, recurse through full_load. */
1900 if ((addr & (size - 1)) != 0) {
1901 goto do_unaligned_access;
1902 }
1903
1904 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1905
1906 /* Handle watchpoints. */
1907 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1908 /* On watchpoint hit, this will longjmp out. */
1909 cpu_check_watchpoint(env_cpu(env), addr, size,
1910 iotlbentry->attrs, BP_MEM_READ, retaddr);
1911 }
1912
1913 need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1914
1915 /* Handle I/O access. */
1916 if (likely(tlb_addr & TLB_MMIO)) {
1917 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
1918 access_type, op ^ (need_swap * MO_BSWAP));
1919 }
1920
1921 haddr = (void *)((uintptr_t)addr + entry->addend);
1922
1923 /*
1924 * Keep these two load_memop separate to ensure that the compiler
1925 * is able to fold the entire function to a single instruction.
1926 * There is a build-time assert inside to remind you of this. ;-)
1927 */
1928 if (unlikely(need_swap)) {
1929 return load_memop(haddr, op ^ MO_BSWAP);
1930 }
1931 return load_memop(haddr, op);
1932 }
1933
1934 /* Handle slow unaligned access (it spans two pages or IO). */
1935 if (size > 1
1936 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1937 >= TARGET_PAGE_SIZE)) {
1938 target_ulong addr1, addr2;
1939 uint64_t r1, r2;
1940 unsigned shift;
1941 do_unaligned_access:
1942 addr1 = addr & ~((target_ulong)size - 1);
1943 addr2 = addr1 + size;
1944 r1 = full_load(env, addr1, oi, retaddr);
1945 r2 = full_load(env, addr2, oi, retaddr);
1946 shift = (addr & (size - 1)) * 8;
1947
1948 if (memop_big_endian(op)) {
1949 /* Big-endian combine. */
1950 res = (r1 << shift) | (r2 >> ((size * 8) - shift));
1951 } else {
1952 /* Little-endian combine. */
1953 res = (r1 >> shift) | (r2 << ((size * 8) - shift));
1954 }
1955 return res & MAKE_64BIT_MASK(0, size * 8);
1956 }
1957
1958 haddr = (void *)((uintptr_t)addr + entry->addend);
1959 return load_memop(haddr, op);
1960 }
1961
1962 /*
1963 * For the benefit of TCG generated code, we want to avoid the
1964 * complication of ABI-specific return type promotion and always
1965 * return a value extended to the register size of the host. This is
1966 * tcg_target_long, except in the case of a 32-bit host and 64-bit
1967 * data, and for that we always have uint64_t.
1968 *
1969 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
1970 */
1971
1972 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
1973 TCGMemOpIdx oi, uintptr_t retaddr)
1974 {
1975 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
1976 }
1977
1978 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
1979 TCGMemOpIdx oi, uintptr_t retaddr)
1980 {
1981 return full_ldub_mmu(env, addr, oi, retaddr);
1982 }
1983
1984 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1985 TCGMemOpIdx oi, uintptr_t retaddr)
1986 {
1987 return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
1988 full_le_lduw_mmu);
1989 }
1990
1991 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1992 TCGMemOpIdx oi, uintptr_t retaddr)
1993 {
1994 return full_le_lduw_mmu(env, addr, oi, retaddr);
1995 }
1996
1997 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1998 TCGMemOpIdx oi, uintptr_t retaddr)
1999 {
2000 return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
2001 full_be_lduw_mmu);
2002 }
2003
2004 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
2005 TCGMemOpIdx oi, uintptr_t retaddr)
2006 {
2007 return full_be_lduw_mmu(env, addr, oi, retaddr);
2008 }
2009
2010 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
2011 TCGMemOpIdx oi, uintptr_t retaddr)
2012 {
2013 return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
2014 full_le_ldul_mmu);
2015 }
2016
2017 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
2018 TCGMemOpIdx oi, uintptr_t retaddr)
2019 {
2020 return full_le_ldul_mmu(env, addr, oi, retaddr);
2021 }
2022
2023 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
2024 TCGMemOpIdx oi, uintptr_t retaddr)
2025 {
2026 return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
2027 full_be_ldul_mmu);
2028 }
2029
2030 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
2031 TCGMemOpIdx oi, uintptr_t retaddr)
2032 {
2033 return full_be_ldul_mmu(env, addr, oi, retaddr);
2034 }
2035
2036 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
2037 TCGMemOpIdx oi, uintptr_t retaddr)
2038 {
2039 return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
2040 helper_le_ldq_mmu);
2041 }
2042
2043 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
2044 TCGMemOpIdx oi, uintptr_t retaddr)
2045 {
2046 return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
2047 helper_be_ldq_mmu);
2048 }
2049
2050 /*
2051 * Provide signed versions of the load routines as well. We can of course
2052 * avoid this for 64-bit data, or for 32-bit data on 32-bit host.
2053 */
2054
2055
2056 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
2057 TCGMemOpIdx oi, uintptr_t retaddr)
2058 {
2059 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
2060 }
2061
2062 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
2063 TCGMemOpIdx oi, uintptr_t retaddr)
2064 {
2065 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
2066 }
2067
2068 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
2069 TCGMemOpIdx oi, uintptr_t retaddr)
2070 {
2071 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
2072 }
2073
2074 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
2075 TCGMemOpIdx oi, uintptr_t retaddr)
2076 {
2077 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
2078 }
2079
2080 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
2081 TCGMemOpIdx oi, uintptr_t retaddr)
2082 {
2083 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
2084 }
2085
2086 /*
2087 * Load helpers for cpu_ldst.h.
2088 */
2089
2090 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
2091 int mmu_idx, uintptr_t retaddr,
2092 MemOp op, FullLoadHelper *full_load)
2093 {
2094 uint16_t meminfo;
2095 TCGMemOpIdx oi;
2096 uint64_t ret;
2097
2098 meminfo = trace_mem_get_info(op, mmu_idx, false);
2099 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
2100
2101 op &= ~MO_SIGN;
2102 oi = make_memop_idx(op, mmu_idx);
2103 ret = full_load(env, addr, oi, retaddr);
2104
2105 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
2106
2107 return ret;
2108 }
2109
2110 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2111 int mmu_idx, uintptr_t ra)
2112 {
2113 return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu);
2114 }
2115
2116 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2117 int mmu_idx, uintptr_t ra)
2118 {
2119 return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB,
2120 full_ldub_mmu);
2121 }
2122
2123 uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2124 int mmu_idx, uintptr_t ra)
2125 {
2126 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu);
2127 }
2128
2129 int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2130 int mmu_idx, uintptr_t ra)
2131 {
2132 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW,
2133 full_be_lduw_mmu);
2134 }
2135
2136 uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2137 int mmu_idx, uintptr_t ra)
2138 {
2139 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu);
2140 }
2141
2142 uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2143 int mmu_idx, uintptr_t ra)
2144 {
2145 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu);
2146 }
2147
2148 uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2149 int mmu_idx, uintptr_t ra)
2150 {
2151 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu);
2152 }
2153
2154 int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2155 int mmu_idx, uintptr_t ra)
2156 {
2157 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW,
2158 full_le_lduw_mmu);
2159 }
2160
2161 uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2162 int mmu_idx, uintptr_t ra)
2163 {
2164 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu);
2165 }
2166
2167 uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2168 int mmu_idx, uintptr_t ra)
2169 {
2170 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu);
2171 }
2172
2173 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr,
2174 uintptr_t retaddr)
2175 {
2176 return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2177 }
2178
2179 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
2180 {
2181 return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2182 }
2183
2184 uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr,
2185 uintptr_t retaddr)
2186 {
2187 return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2188 }
2189
2190 int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
2191 {
2192 return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2193 }
2194
2195 uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr,
2196 uintptr_t retaddr)
2197 {
2198 return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2199 }
2200
2201 uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr,
2202 uintptr_t retaddr)
2203 {
2204 return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2205 }
2206
2207 uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr,
2208 uintptr_t retaddr)
2209 {
2210 return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2211 }
2212
2213 int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
2214 {
2215 return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2216 }
2217
2218 uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr,
2219 uintptr_t retaddr)
2220 {
2221 return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2222 }
2223
2224 uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr,
2225 uintptr_t retaddr)
2226 {
2227 return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2228 }
2229
2230 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr)
2231 {
2232 return cpu_ldub_data_ra(env, ptr, 0);
2233 }
2234
2235 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr)
2236 {
2237 return cpu_ldsb_data_ra(env, ptr, 0);
2238 }
2239
2240 uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr)
2241 {
2242 return cpu_lduw_be_data_ra(env, ptr, 0);
2243 }
2244
2245 int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr)
2246 {
2247 return cpu_ldsw_be_data_ra(env, ptr, 0);
2248 }
2249
2250 uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr)
2251 {
2252 return cpu_ldl_be_data_ra(env, ptr, 0);
2253 }
2254
2255 uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr)
2256 {
2257 return cpu_ldq_be_data_ra(env, ptr, 0);
2258 }
2259
2260 uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr)
2261 {
2262 return cpu_lduw_le_data_ra(env, ptr, 0);
2263 }
2264
2265 int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr)
2266 {
2267 return cpu_ldsw_le_data_ra(env, ptr, 0);
2268 }
2269
2270 uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr)
2271 {
2272 return cpu_ldl_le_data_ra(env, ptr, 0);
2273 }
2274
2275 uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr)
2276 {
2277 return cpu_ldq_le_data_ra(env, ptr, 0);
2278 }
2279
2280 /*
2281 * Store Helpers
2282 */
2283
2284 static inline void QEMU_ALWAYS_INLINE
2285 store_memop(void *haddr, uint64_t val, MemOp op)
2286 {
2287 switch (op) {
2288 case MO_UB:
2289 stb_p(haddr, val);
2290 break;
2291 case MO_BEUW:
2292 stw_be_p(haddr, val);
2293 break;
2294 case MO_LEUW:
2295 stw_le_p(haddr, val);
2296 break;
2297 case MO_BEUL:
2298 stl_be_p(haddr, val);
2299 break;
2300 case MO_LEUL:
2301 stl_le_p(haddr, val);
2302 break;
2303 case MO_BEQ:
2304 stq_be_p(haddr, val);
2305 break;
2306 case MO_LEQ:
2307 stq_le_p(haddr, val);
2308 break;
2309 default:
2310 qemu_build_not_reached();
2311 }
2312 }
2313
2314 static void __attribute__((noinline))
2315 store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
2316 uintptr_t retaddr, size_t size, uintptr_t mmu_idx,
2317 bool big_endian)
2318 {
2319 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
2320 uintptr_t index, index2;
2321 CPUTLBEntry *entry, *entry2;
2322 target_ulong page2, tlb_addr, tlb_addr2;
2323 TCGMemOpIdx oi;
2324 size_t size2;
2325 int i;
2326
2327 /*
2328 * Ensure the second page is in the TLB. Note that the first page
2329 * is already guaranteed to be filled, and that the second page
2330 * cannot evict the first.
2331 */
2332 page2 = (addr + size) & TARGET_PAGE_MASK;
2333 size2 = (addr + size) & ~TARGET_PAGE_MASK;
2334 index2 = tlb_index(env, mmu_idx, page2);
2335 entry2 = tlb_entry(env, mmu_idx, page2);
2336
2337 tlb_addr2 = tlb_addr_write(entry2);
2338 if (!tlb_hit_page(tlb_addr2, page2)) {
2339 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
2340 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
2341 mmu_idx, retaddr);
2342 index2 = tlb_index(env, mmu_idx, page2);
2343 entry2 = tlb_entry(env, mmu_idx, page2);
2344 }
2345 tlb_addr2 = tlb_addr_write(entry2);
2346 }
2347
2348 index = tlb_index(env, mmu_idx, addr);
2349 entry = tlb_entry(env, mmu_idx, addr);
2350 tlb_addr = tlb_addr_write(entry);
2351
2352 /*
2353 * Handle watchpoints. Since this may trap, all checks
2354 * must happen before any store.
2355 */
2356 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
2357 cpu_check_watchpoint(env_cpu(env), addr, size - size2,
2358 env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
2359 BP_MEM_WRITE, retaddr);
2360 }
2361 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
2362 cpu_check_watchpoint(env_cpu(env), page2, size2,
2363 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
2364 BP_MEM_WRITE, retaddr);
2365 }
2366
2367 /*
2368 * XXX: not efficient, but simple.
2369 * This loop must go in the forward direction to avoid issues
2370 * with self-modifying code in Windows 64-bit.
2371 */
2372 oi = make_memop_idx(MO_UB, mmu_idx);
2373 if (big_endian) {
2374 for (i = 0; i < size; ++i) {
2375 /* Big-endian extract. */
2376 uint8_t val8 = val >> (((size - 1) * 8) - (i * 8));
2377 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
2378 }
2379 } else {
2380 for (i = 0; i < size; ++i) {
2381 /* Little-endian extract. */
2382 uint8_t val8 = val >> (i * 8);
2383 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
2384 }
2385 }
2386 }
2387
2388 static inline void QEMU_ALWAYS_INLINE
2389 store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
2390 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
2391 {
2392 uintptr_t mmu_idx = get_mmuidx(oi);
2393 uintptr_t index = tlb_index(env, mmu_idx, addr);
2394 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
2395 target_ulong tlb_addr = tlb_addr_write(entry);
2396 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
2397 unsigned a_bits = get_alignment_bits(get_memop(oi));
2398 void *haddr;
2399 size_t size = memop_size(op);
2400
2401 /* Handle CPU specific unaligned behaviour */
2402 if (addr & ((1 << a_bits) - 1)) {
2403 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
2404 mmu_idx, retaddr);
2405 }
2406
2407 /* If the TLB entry is for a different page, reload and try again. */
2408 if (!tlb_hit(tlb_addr, addr)) {
2409 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
2410 addr & TARGET_PAGE_MASK)) {
2411 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
2412 mmu_idx, retaddr);
2413 index = tlb_index(env, mmu_idx, addr);
2414 entry = tlb_entry(env, mmu_idx, addr);
2415 }
2416 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
2417 }
2418
2419 /* Handle anything that isn't just a straight memory access. */
2420 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
2421 CPUIOTLBEntry *iotlbentry;
2422 bool need_swap;
2423
2424 /* For anything that is unaligned, recurse through byte stores. */
2425 if ((addr & (size - 1)) != 0) {
2426 goto do_unaligned_access;
2427 }
2428
2429 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
2430
2431 /* Handle watchpoints. */
2432 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
2433 /* On watchpoint hit, this will longjmp out. */
2434 cpu_check_watchpoint(env_cpu(env), addr, size,
2435 iotlbentry->attrs, BP_MEM_WRITE, retaddr);
2436 }
2437
2438 need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
2439
2440 /* Handle I/O access. */
2441 if (tlb_addr & TLB_MMIO) {
2442 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
2443 op ^ (need_swap * MO_BSWAP));
2444 return;
2445 }
2446
2447 /* Ignore writes to ROM. */
2448 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
2449 return;
2450 }
2451
2452 /* Handle clean RAM pages. */
2453 if (tlb_addr & TLB_NOTDIRTY) {
2454 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
2455 }
2456
2457 haddr = (void *)((uintptr_t)addr + entry->addend);
2458
2459 /*
2460 * Keep these two store_memop separate to ensure that the compiler
2461 * is able to fold the entire function to a single instruction.
2462 * There is a build-time assert inside to remind you of this. ;-)
2463 */
2464 if (unlikely(need_swap)) {
2465 store_memop(haddr, val, op ^ MO_BSWAP);
2466 } else {
2467 store_memop(haddr, val, op);
2468 }
2469 return;
2470 }
2471
2472 /* Handle slow unaligned access (it spans two pages or IO). */
2473 if (size > 1
2474 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
2475 >= TARGET_PAGE_SIZE)) {
2476 do_unaligned_access:
2477 store_helper_unaligned(env, addr, val, retaddr, size,
2478 mmu_idx, memop_big_endian(op));
2479 return;
2480 }
2481
2482 haddr = (void *)((uintptr_t)addr + entry->addend);
2483 store_memop(haddr, val, op);
2484 }
2485
2486 void __attribute__((noinline))
2487 helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
2488 TCGMemOpIdx oi, uintptr_t retaddr)
2489 {
2490 store_helper(env, addr, val, oi, retaddr, MO_UB);
2491 }
2492
2493 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
2494 TCGMemOpIdx oi, uintptr_t retaddr)
2495 {
2496 store_helper(env, addr, val, oi, retaddr, MO_LEUW);
2497 }
2498
2499 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
2500 TCGMemOpIdx oi, uintptr_t retaddr)
2501 {
2502 store_helper(env, addr, val, oi, retaddr, MO_BEUW);
2503 }
2504
2505 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
2506 TCGMemOpIdx oi, uintptr_t retaddr)
2507 {
2508 store_helper(env, addr, val, oi, retaddr, MO_LEUL);
2509 }
2510
2511 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
2512 TCGMemOpIdx oi, uintptr_t retaddr)
2513 {
2514 store_helper(env, addr, val, oi, retaddr, MO_BEUL);
2515 }
2516
2517 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
2518 TCGMemOpIdx oi, uintptr_t retaddr)
2519 {
2520 store_helper(env, addr, val, oi, retaddr, MO_LEQ);
2521 }
2522
2523 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
2524 TCGMemOpIdx oi, uintptr_t retaddr)
2525 {
2526 store_helper(env, addr, val, oi, retaddr, MO_BEQ);
2527 }
2528
2529 /*
2530 * Store Helpers for cpu_ldst.h
2531 */
2532
2533 static inline void QEMU_ALWAYS_INLINE
2534 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
2535 int mmu_idx, uintptr_t retaddr, MemOp op)
2536 {
2537 TCGMemOpIdx oi;
2538 uint16_t meminfo;
2539
2540 meminfo = trace_mem_get_info(op, mmu_idx, true);
2541 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
2542
2543 oi = make_memop_idx(op, mmu_idx);
2544 store_helper(env, addr, val, oi, retaddr, op);
2545
2546 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
2547 }
2548
2549 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2550 int mmu_idx, uintptr_t retaddr)
2551 {
2552 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB);
2553 }
2554
2555 void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2556 int mmu_idx, uintptr_t retaddr)
2557 {
2558 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW);
2559 }
2560
2561 void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2562 int mmu_idx, uintptr_t retaddr)
2563 {
2564 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL);
2565 }
2566
2567 void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
2568 int mmu_idx, uintptr_t retaddr)
2569 {
2570 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ);
2571 }
2572
2573 void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2574 int mmu_idx, uintptr_t retaddr)
2575 {
2576 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW);
2577 }
2578
2579 void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2580 int mmu_idx, uintptr_t retaddr)
2581 {
2582 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL);
2583 }
2584
2585 void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
2586 int mmu_idx, uintptr_t retaddr)
2587 {
2588 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ);
2589 }
2590
2591 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr,
2592 uint32_t val, uintptr_t retaddr)
2593 {
2594 cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2595 }
2596
2597 void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr,
2598 uint32_t val, uintptr_t retaddr)
2599 {
2600 cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2601 }
2602
2603 void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr,
2604 uint32_t val, uintptr_t retaddr)
2605 {
2606 cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2607 }
2608
2609 void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr,
2610 uint64_t val, uintptr_t retaddr)
2611 {
2612 cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2613 }
2614
2615 void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr,
2616 uint32_t val, uintptr_t retaddr)
2617 {
2618 cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2619 }
2620
2621 void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr,
2622 uint32_t val, uintptr_t retaddr)
2623 {
2624 cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2625 }
2626
2627 void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr,
2628 uint64_t val, uintptr_t retaddr)
2629 {
2630 cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2631 }
2632
2633 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2634 {
2635 cpu_stb_data_ra(env, ptr, val, 0);
2636 }
2637
2638 void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2639 {
2640 cpu_stw_be_data_ra(env, ptr, val, 0);
2641 }
2642
2643 void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2644 {
2645 cpu_stl_be_data_ra(env, ptr, val, 0);
2646 }
2647
2648 void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val)
2649 {
2650 cpu_stq_be_data_ra(env, ptr, val, 0);
2651 }
2652
2653 void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2654 {
2655 cpu_stw_le_data_ra(env, ptr, val, 0);
2656 }
2657
2658 void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2659 {
2660 cpu_stl_le_data_ra(env, ptr, val, 0);
2661 }
2662
2663 void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val)
2664 {
2665 cpu_stq_le_data_ra(env, ptr, val, 0);
2666 }
2667
2668 /* First set of helpers allows passing in of OI and RETADDR. This makes
2669 them callable from other helpers. */
2670
2671 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr
2672 #define ATOMIC_NAME(X) \
2673 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
2674 #define ATOMIC_MMU_DECLS
2675 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
2676 #define ATOMIC_MMU_CLEANUP
2677 #define ATOMIC_MMU_IDX get_mmuidx(oi)
2678
2679 #include "atomic_common.c.inc"
2680
2681 #define DATA_SIZE 1
2682 #include "atomic_template.h"
2683
2684 #define DATA_SIZE 2
2685 #include "atomic_template.h"
2686
2687 #define DATA_SIZE 4
2688 #include "atomic_template.h"
2689
2690 #ifdef CONFIG_ATOMIC64
2691 #define DATA_SIZE 8
2692 #include "atomic_template.h"
2693 #endif
2694
2695 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
2696 #define DATA_SIZE 16
2697 #include "atomic_template.h"
2698 #endif
2699
2700 /* Second set of helpers are directly callable from TCG as helpers. */
2701
2702 #undef EXTRA_ARGS
2703 #undef ATOMIC_NAME
2704 #undef ATOMIC_MMU_LOOKUP
2705 #define EXTRA_ARGS , TCGMemOpIdx oi
2706 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
2707 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC())
2708
2709 #define DATA_SIZE 1
2710 #include "atomic_template.h"
2711
2712 #define DATA_SIZE 2
2713 #include "atomic_template.h"
2714
2715 #define DATA_SIZE 4
2716 #include "atomic_template.h"
2717
2718 #ifdef CONFIG_ATOMIC64
2719 #define DATA_SIZE 8
2720 #include "atomic_template.h"
2721 #endif
2722 #undef ATOMIC_MMU_IDX
2723
2724 /* Code access functions. */
2725
2726 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
2727 TCGMemOpIdx oi, uintptr_t retaddr)
2728 {
2729 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
2730 }
2731
2732 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
2733 {
2734 TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
2735 return full_ldub_code(env, addr, oi, 0);
2736 }
2737
2738 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
2739 TCGMemOpIdx oi, uintptr_t retaddr)
2740 {
2741 return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
2742 }
2743
2744 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
2745 {
2746 TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
2747 return full_lduw_code(env, addr, oi, 0);
2748 }
2749
2750 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
2751 TCGMemOpIdx oi, uintptr_t retaddr)
2752 {
2753 return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
2754 }
2755
2756 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
2757 {
2758 TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
2759 return full_ldl_code(env, addr, oi, 0);
2760 }
2761
2762 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
2763 TCGMemOpIdx oi, uintptr_t retaddr)
2764 {
2765 return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code);
2766 }
2767
2768 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
2769 {
2770 TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true));
2771 return full_ldq_code(env, addr, oi, 0);
2772 }