2 * PowerPC Radix MMU mulation helpers for QEMU.
4 * Copyright (c) 2016 Suraj Jitindar Singh, IBM Corporation
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/exec-all.h"
23 #include "qemu/error-report.h"
24 #include "sysemu/kvm.h"
28 #include "mmu-radix64.h"
29 #include "mmu-book3s-v3.h"
31 static bool ppc_radix64_get_fully_qualified_addr(const CPUPPCState
*env
,
33 uint64_t *lpid
, uint64_t *pid
)
35 /* When EA(2:11) are nonzero, raise a segment interrupt */
36 if (eaddr
& ~R_EADDR_VALID_MASK
) {
40 if (FIELD_EX64(env
->msr
, MSR
, HV
)) { /* MSR[HV] -> Hypervisor/bare metal */
41 switch (eaddr
& R_EADDR_QUADRANT
) {
42 case R_EADDR_QUADRANT0
:
44 *pid
= env
->spr
[SPR_BOOKS_PID
];
46 case R_EADDR_QUADRANT1
:
47 *lpid
= env
->spr
[SPR_LPIDR
];
48 *pid
= env
->spr
[SPR_BOOKS_PID
];
50 case R_EADDR_QUADRANT2
:
51 *lpid
= env
->spr
[SPR_LPIDR
];
54 case R_EADDR_QUADRANT3
:
59 g_assert_not_reached();
61 } else { /* !MSR[HV] -> Guest */
62 switch (eaddr
& R_EADDR_QUADRANT
) {
63 case R_EADDR_QUADRANT0
: /* Guest application */
64 *lpid
= env
->spr
[SPR_LPIDR
];
65 *pid
= env
->spr
[SPR_BOOKS_PID
];
67 case R_EADDR_QUADRANT1
: /* Illegal */
68 case R_EADDR_QUADRANT2
:
70 case R_EADDR_QUADRANT3
: /* Guest OS */
71 *lpid
= env
->spr
[SPR_LPIDR
];
72 *pid
= 0; /* pid set to 0 -> addresses guest operating system */
75 g_assert_not_reached();
82 static void ppc_radix64_raise_segi(PowerPCCPU
*cpu
, MMUAccessType access_type
,
85 CPUState
*cs
= CPU(cpu
);
86 CPUPPCState
*env
= &cpu
->env
;
88 switch (access_type
) {
90 /* Instruction Segment Interrupt */
91 cs
->exception_index
= POWERPC_EXCP_ISEG
;
95 /* Data Segment Interrupt */
96 cs
->exception_index
= POWERPC_EXCP_DSEG
;
97 env
->spr
[SPR_DAR
] = eaddr
;
100 g_assert_not_reached();
105 static inline const char *access_str(MMUAccessType access_type
)
107 return access_type
== MMU_DATA_LOAD
? "reading" :
108 (access_type
== MMU_DATA_STORE
? "writing" : "execute");
111 static void ppc_radix64_raise_si(PowerPCCPU
*cpu
, MMUAccessType access_type
,
112 vaddr eaddr
, uint32_t cause
)
114 CPUState
*cs
= CPU(cpu
);
115 CPUPPCState
*env
= &cpu
->env
;
117 qemu_log_mask(CPU_LOG_MMU
, "%s for %s @0x%"VADDR_PRIx
" cause %08x\n",
118 __func__
, access_str(access_type
),
121 switch (access_type
) {
123 /* Instruction Storage Interrupt */
124 cs
->exception_index
= POWERPC_EXCP_ISI
;
125 env
->error_code
= cause
;
128 cause
|= DSISR_ISSTORE
;
131 /* Data Storage Interrupt */
132 cs
->exception_index
= POWERPC_EXCP_DSI
;
133 env
->spr
[SPR_DSISR
] = cause
;
134 env
->spr
[SPR_DAR
] = eaddr
;
138 g_assert_not_reached();
142 static void ppc_radix64_raise_hsi(PowerPCCPU
*cpu
, MMUAccessType access_type
,
143 vaddr eaddr
, hwaddr g_raddr
, uint32_t cause
)
145 CPUState
*cs
= CPU(cpu
);
146 CPUPPCState
*env
= &cpu
->env
;
149 if (cause
& DSISR_PRTABLE_FAULT
) {
150 /* HDSI PRTABLE_FAULT gets the originating access type in error_code */
151 env
->error_code
= access_type
;
152 access_type
= MMU_DATA_LOAD
;
155 qemu_log_mask(CPU_LOG_MMU
, "%s for %s @0x%"VADDR_PRIx
" 0x%"
156 HWADDR_PRIx
" cause %08x\n",
157 __func__
, access_str(access_type
),
158 eaddr
, g_raddr
, cause
);
160 switch (access_type
) {
162 /* H Instruction Storage Interrupt */
163 cs
->exception_index
= POWERPC_EXCP_HISI
;
164 env
->spr
[SPR_ASDR
] = g_raddr
;
165 env
->error_code
= cause
;
168 cause
|= DSISR_ISSTORE
;
171 /* H Data Storage Interrupt */
172 cs
->exception_index
= POWERPC_EXCP_HDSI
;
173 env
->spr
[SPR_HDSISR
] = cause
;
174 env
->spr
[SPR_HDAR
] = eaddr
;
175 env
->spr
[SPR_ASDR
] = g_raddr
;
178 g_assert_not_reached();
182 static bool ppc_radix64_check_prot(PowerPCCPU
*cpu
, MMUAccessType access_type
,
183 uint64_t pte
, int *fault_cause
, int *prot
,
184 int mmu_idx
, bool partition_scoped
)
186 CPUPPCState
*env
= &cpu
->env
;
189 /* Check Page Attributes (pte58:59) */
190 if ((pte
& R_PTE_ATT
) == R_PTE_ATT_NI_IO
&& access_type
== MMU_INST_FETCH
) {
192 * Radix PTE entries with the non-idempotent I/O attribute are treated
195 *fault_cause
|= SRR1_NOEXEC_GUARD
;
199 /* Determine permissions allowed by Encoded Access Authority */
200 if (!partition_scoped
&& (pte
& R_PTE_EAA_PRIV
) &&
201 FIELD_EX64(env
->msr
, MSR
, PR
)) {
203 } else if (mmuidx_pr(mmu_idx
) || (pte
& R_PTE_EAA_PRIV
) ||
205 *prot
= ppc_radix64_get_prot_eaa(pte
);
206 } else { /* !MSR_PR && !(pte & R_PTE_EAA_PRIV) && !partition_scoped */
207 *prot
= ppc_radix64_get_prot_eaa(pte
);
208 *prot
&= ppc_radix64_get_prot_amr(cpu
); /* Least combined permissions */
211 /* Check if requested access type is allowed */
212 need_prot
= prot_for_access_type(access_type
);
213 if (need_prot
& ~*prot
) { /* Page Protected for that Access */
214 *fault_cause
|= access_type
== MMU_INST_FETCH
? SRR1_NOEXEC_GUARD
:
222 static void ppc_radix64_set_rc(PowerPCCPU
*cpu
, MMUAccessType access_type
,
223 uint64_t pte
, hwaddr pte_addr
, int *prot
)
225 CPUState
*cs
= CPU(cpu
);
228 npte
= pte
| R_PTE_R
; /* Always set reference bit */
230 if (access_type
== MMU_DATA_STORE
) { /* Store/Write */
231 npte
|= R_PTE_C
; /* Set change bit */
234 * Treat the page as read-only for now, so that a later write
235 * will pass through this function again to set the C bit.
237 *prot
&= ~PAGE_WRITE
;
240 if (pte
^ npte
) { /* If pte has changed then write it back */
241 stq_phys(cs
->as
, pte_addr
, npte
);
245 static bool ppc_radix64_is_valid_level(int level
, int psize
, uint64_t nls
)
250 * Check if this is a valid level, according to POWER9 and POWER10
251 * Processor User's Manuals, sections 4.10.4.1 and 5.10.6.1, respectively:
252 * Supported Radix Tree Configurations and Resulting Page Sizes.
254 * Note: these checks are specific to POWER9 and POWER10 CPUs. Any future
255 * CPUs that supports a different Radix MMU configuration will need their
256 * own implementation.
259 case 0: /* Root Page Dir */
260 ret
= psize
== 52 && nls
== 13;
267 ret
= nls
== 9 || nls
== 5;
273 if (unlikely(!ret
)) {
274 qemu_log_mask(LOG_GUEST_ERROR
, "invalid radix configuration: "
275 "level %d size %d nls %"PRIu64
"\n",
281 static int ppc_radix64_next_level(AddressSpace
*as
, vaddr eaddr
,
282 uint64_t *pte_addr
, uint64_t *nls
,
283 int *psize
, uint64_t *pte
, int *fault_cause
)
285 uint64_t index
, mask
, nlb
, pde
;
287 /* Read page <directory/table> entry from guest address space */
288 pde
= ldq_phys(as
, *pte_addr
);
289 if (!(pde
& R_PTE_VALID
)) { /* Invalid Entry */
290 *fault_cause
|= DSISR_NOPTE
;
296 if (!(pde
& R_PTE_LEAF
)) { /* Prepare for next iteration */
297 *nls
= pde
& R_PDE_NLS
;
298 index
= eaddr
>> (*psize
- *nls
); /* Shift */
299 index
&= ((1UL << *nls
) - 1); /* Mask */
300 nlb
= pde
& R_PDE_NLB
;
301 mask
= MAKE_64BIT_MASK(0, *nls
+ 3);
304 qemu_log_mask(LOG_GUEST_ERROR
,
305 "%s: misaligned page dir/table base: 0x"TARGET_FMT_lx
306 " page dir size: 0x"TARGET_FMT_lx
"\n",
307 __func__
, nlb
, mask
+ 1);
310 *pte_addr
= nlb
+ index
* sizeof(pde
);
315 static int ppc_radix64_walk_tree(AddressSpace
*as
, vaddr eaddr
,
316 uint64_t base_addr
, uint64_t nls
,
317 hwaddr
*raddr
, int *psize
, uint64_t *pte
,
318 int *fault_cause
, hwaddr
*pte_addr
)
320 uint64_t index
, pde
, rpn
, mask
;
323 index
= eaddr
>> (*psize
- nls
); /* Shift */
324 index
&= ((1UL << nls
) - 1); /* Mask */
325 mask
= MAKE_64BIT_MASK(0, nls
+ 3);
327 if (base_addr
& mask
) {
328 qemu_log_mask(LOG_GUEST_ERROR
,
329 "%s: misaligned page dir base: 0x"TARGET_FMT_lx
330 " page dir size: 0x"TARGET_FMT_lx
"\n",
331 __func__
, base_addr
, mask
+ 1);
334 *pte_addr
= base_addr
+ index
* sizeof(pde
);
339 if (!ppc_radix64_is_valid_level(level
++, *psize
, nls
)) {
340 *fault_cause
|= DSISR_R_BADCONFIG
;
344 ret
= ppc_radix64_next_level(as
, eaddr
, pte_addr
, &nls
, psize
, &pde
,
349 } while (!(pde
& R_PTE_LEAF
));
352 rpn
= pde
& R_PTE_RPN
;
353 mask
= (1UL << *psize
) - 1;
355 /* Or high bits of rpn and low bits to ea to form whole real addr */
356 *raddr
= (rpn
& ~mask
) | (eaddr
& mask
);
360 static bool validate_pate(PowerPCCPU
*cpu
, uint64_t lpid
, ppc_v3_pate_t
*pate
)
362 CPUPPCState
*env
= &cpu
->env
;
364 if (!(pate
->dw0
& PATE0_HR
)) {
367 if (lpid
== 0 && !FIELD_EX64(env
->msr
, MSR
, HV
)) {
370 if ((pate
->dw0
& PATE1_R_PRTS
) < 5) {
373 /* More checks ... */
377 static int ppc_radix64_partition_scoped_xlate(PowerPCCPU
*cpu
,
378 MMUAccessType orig_access_type
,
379 vaddr eaddr
, hwaddr g_raddr
,
381 hwaddr
*h_raddr
, int *h_prot
,
382 int *h_page_size
, bool pde_addr
,
383 int mmu_idx
, bool guest_visible
)
385 MMUAccessType access_type
= orig_access_type
;
392 * Translation of process-scoped tables/directories is performed as
395 access_type
= MMU_DATA_LOAD
;
398 qemu_log_mask(CPU_LOG_MMU
, "%s for %s @0x%"VADDR_PRIx
399 " mmu_idx %u 0x%"HWADDR_PRIx
"\n",
400 __func__
, access_str(access_type
),
401 eaddr
, mmu_idx
, g_raddr
);
403 *h_page_size
= PRTBE_R_GET_RTS(pate
.dw0
);
404 /* No valid pte or access denied due to protection */
405 if (ppc_radix64_walk_tree(CPU(cpu
)->as
, g_raddr
, pate
.dw0
& PRTBE_R_RPDB
,
406 pate
.dw0
& PRTBE_R_RPDS
, h_raddr
, h_page_size
,
407 &pte
, &fault_cause
, &pte_addr
) ||
408 ppc_radix64_check_prot(cpu
, access_type
, pte
,
409 &fault_cause
, h_prot
, mmu_idx
, true)) {
410 if (pde_addr
) { /* address being translated was that of a guest pde */
411 fault_cause
|= DSISR_PRTABLE_FAULT
;
414 ppc_radix64_raise_hsi(cpu
, orig_access_type
,
415 eaddr
, g_raddr
, fault_cause
);
421 ppc_radix64_set_rc(cpu
, access_type
, pte
, pte_addr
, h_prot
);
428 * The spapr vhc has a flat partition scope provided by qemu memory when
431 * When running a nested guest, the addressing is 2-level radix on top of the
432 * vhc memory, so it works practically identically to the bare metal 2-level
433 * radix. So that code is selected directly. A cleaner and more flexible nested
434 * hypervisor implementation would allow the vhc to provide a ->nested_xlate()
435 * function but that is not required for the moment.
437 static bool vhyp_flat_addressing(PowerPCCPU
*cpu
)
440 return !vhyp_cpu_in_nested(cpu
);
445 static int ppc_radix64_process_scoped_xlate(PowerPCCPU
*cpu
,
446 MMUAccessType access_type
,
447 vaddr eaddr
, uint64_t pid
,
448 ppc_v3_pate_t pate
, hwaddr
*g_raddr
,
449 int *g_prot
, int *g_page_size
,
450 int mmu_idx
, bool guest_visible
)
452 CPUState
*cs
= CPU(cpu
);
453 CPUPPCState
*env
= &cpu
->env
;
454 uint64_t offset
, size
, prtb
, prtbe_addr
, prtbe0
, base_addr
, nls
, index
, pte
;
455 int fault_cause
= 0, h_page_size
, h_prot
;
456 hwaddr h_raddr
, pte_addr
;
459 qemu_log_mask(CPU_LOG_MMU
, "%s for %s @0x%"VADDR_PRIx
460 " mmu_idx %u pid %"PRIu64
"\n",
461 __func__
, access_str(access_type
),
462 eaddr
, mmu_idx
, pid
);
464 prtb
= (pate
.dw1
& PATE1_R_PRTB
);
465 size
= 1ULL << ((pate
.dw1
& PATE1_R_PRTS
) + 12);
466 if (prtb
& (size
- 1)) {
467 /* Process Table not properly aligned */
469 ppc_radix64_raise_si(cpu
, access_type
, eaddr
, DSISR_R_BADCONFIG
);
474 /* Index Process Table by PID to Find Corresponding Process Table Entry */
475 offset
= pid
* sizeof(struct prtb_entry
);
476 if (offset
>= size
) {
477 /* offset exceeds size of the process table */
479 ppc_radix64_raise_si(cpu
, access_type
, eaddr
, DSISR_NOPTE
);
483 prtbe_addr
= prtb
+ offset
;
485 if (vhyp_flat_addressing(cpu
)) {
486 prtbe0
= ldq_phys(cs
->as
, prtbe_addr
);
489 * Process table addresses are subject to partition-scoped
492 * On a Radix host, the partition-scoped page table for LPID=0
493 * is only used to translate the effective addresses of the
494 * process table entries.
496 /* mmu_idx is 5 because we're translating from hypervisor scope */
497 ret
= ppc_radix64_partition_scoped_xlate(cpu
, access_type
, eaddr
,
498 prtbe_addr
, pate
, &h_raddr
,
499 &h_prot
, &h_page_size
, true,
504 prtbe0
= ldq_phys(cs
->as
, h_raddr
);
507 /* Walk Radix Tree from Process Table Entry to Convert EA to RA */
508 *g_page_size
= PRTBE_R_GET_RTS(prtbe0
);
509 base_addr
= prtbe0
& PRTBE_R_RPDB
;
510 nls
= prtbe0
& PRTBE_R_RPDS
;
511 if (FIELD_EX64(env
->msr
, MSR
, HV
) || vhyp_flat_addressing(cpu
)) {
513 * Can treat process table addresses as real addresses
515 ret
= ppc_radix64_walk_tree(cs
->as
, eaddr
& R_EADDR_MASK
, base_addr
,
516 nls
, g_raddr
, g_page_size
, &pte
,
517 &fault_cause
, &pte_addr
);
521 ppc_radix64_raise_si(cpu
, access_type
, eaddr
, fault_cause
);
529 index
= (eaddr
& R_EADDR_MASK
) >> (*g_page_size
- nls
); /* Shift */
530 index
&= ((1UL << nls
) - 1); /* Mask */
531 pte_addr
= base_addr
+ (index
* sizeof(pte
));
534 * Each process table address is subject to a partition-scoped
538 /* mmu_idx is 5 because we're translating from hypervisor scope */
539 ret
= ppc_radix64_partition_scoped_xlate(cpu
, access_type
, eaddr
,
540 pte_addr
, pate
, &h_raddr
,
541 &h_prot
, &h_page_size
,
542 true, 5, guest_visible
);
547 if (!ppc_radix64_is_valid_level(level
++, *g_page_size
, nls
)) {
548 fault_cause
|= DSISR_R_BADCONFIG
;
551 ret
= ppc_radix64_next_level(cs
->as
, eaddr
& R_EADDR_MASK
,
552 &h_raddr
, &nls
, g_page_size
,
559 ppc_radix64_raise_si(cpu
, access_type
, eaddr
, fault_cause
);
564 } while (!(pte
& R_PTE_LEAF
));
566 rpn
= pte
& R_PTE_RPN
;
567 mask
= (1UL << *g_page_size
) - 1;
569 /* Or high bits of rpn and low bits to ea to form whole real addr */
570 *g_raddr
= (rpn
& ~mask
) | (eaddr
& mask
);
573 if (ppc_radix64_check_prot(cpu
, access_type
, pte
, &fault_cause
,
574 g_prot
, mmu_idx
, false)) {
575 /* Access denied due to protection */
577 ppc_radix64_raise_si(cpu
, access_type
, eaddr
, fault_cause
);
583 ppc_radix64_set_rc(cpu
, access_type
, pte
, pte_addr
, g_prot
);
590 * Radix tree translation is a 2 steps translation process:
592 * 1. Process-scoped translation: Guest Eff Addr -> Guest Real Addr
593 * 2. Partition-scoped translation: Guest Real Addr -> Host Real Addr
596 * +-------------+----------------+---------------+
597 * | | HV = 0 | HV = 1 |
598 * +-------------+----------------+---------------+
599 * | Relocation | Partition | No |
600 * | = Off | Scoped | Translation |
601 * Relocation +-------------+----------------+---------------+
602 * | Relocation | Partition & | Process |
603 * | = On | Process Scoped | Scoped |
604 * +-------------+----------------+---------------+
606 static bool ppc_radix64_xlate_impl(PowerPCCPU
*cpu
, vaddr eaddr
,
607 MMUAccessType access_type
, hwaddr
*raddr
,
608 int *psizep
, int *protp
, int mmu_idx
,
611 CPUPPCState
*env
= &cpu
->env
;
618 assert(!(mmuidx_hv(mmu_idx
) && cpu
->vhyp
));
620 relocation
= !mmuidx_real(mmu_idx
);
622 /* HV or virtual hypervisor Real Mode Access */
623 if (!relocation
&& (mmuidx_hv(mmu_idx
) || vhyp_flat_addressing(cpu
))) {
624 /* In real mode top 4 effective addr bits (mostly) ignored */
625 *raddr
= eaddr
& 0x0FFFFFFFFFFFFFFFULL
;
627 /* In HV mode, add HRMOR if top EA bit is clear */
628 if (mmuidx_hv(mmu_idx
) || !env
->has_hv_mode
) {
629 if (!(eaddr
>> 63)) {
630 *raddr
|= env
->spr
[SPR_HRMOR
];
633 *protp
= PAGE_READ
| PAGE_WRITE
| PAGE_EXEC
;
634 *psizep
= TARGET_PAGE_BITS
;
639 * Check UPRT (we avoid the check in real mode to deal with
640 * transitional states during kexec.
642 if (guest_visible
&& !ppc64_use_proc_tbl(cpu
)) {
643 qemu_log_mask(LOG_GUEST_ERROR
,
644 "LPCR:UPRT not set in radix mode ! LPCR="
645 TARGET_FMT_lx
"\n", env
->spr
[SPR_LPCR
]);
648 /* Virtual Mode Access - get the fully qualified address */
649 if (!ppc_radix64_get_fully_qualified_addr(&cpu
->env
, eaddr
, &lpid
, &pid
)) {
651 ppc_radix64_raise_segi(cpu
, access_type
, eaddr
);
656 /* Get Partition Table */
658 PPCVirtualHypervisorClass
*vhc
;
659 vhc
= PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu
->vhyp
);
660 if (!vhc
->get_pate(cpu
->vhyp
, cpu
, lpid
, &pate
)) {
662 ppc_radix64_raise_hsi(cpu
, access_type
, eaddr
, eaddr
,
668 if (!ppc64_v3_get_pate(cpu
, lpid
, &pate
)) {
670 ppc_radix64_raise_hsi(cpu
, access_type
, eaddr
, eaddr
,
675 if (!validate_pate(cpu
, lpid
, &pate
)) {
677 ppc_radix64_raise_hsi(cpu
, access_type
, eaddr
, eaddr
,
685 *protp
= PAGE_READ
| PAGE_WRITE
| PAGE_EXEC
;
688 * Perform process-scoped translation if relocation enabled.
690 * - Translates an effective address to a host real address in
691 * quadrants 0 and 3 when HV=1.
693 * - Translates an effective address to a guest real address.
696 int ret
= ppc_radix64_process_scoped_xlate(cpu
, access_type
, eaddr
, pid
,
697 pate
, &g_raddr
, &prot
,
698 &psize
, mmu_idx
, guest_visible
);
702 *psizep
= MIN(*psizep
, psize
);
705 g_raddr
= eaddr
& R_EADDR_MASK
;
708 if (vhyp_flat_addressing(cpu
)) {
712 * Perform partition-scoped translation if !HV or HV access to
713 * quadrants 1 or 2. Translates a guest real address to a host
716 if (lpid
|| !mmuidx_hv(mmu_idx
)) {
719 ret
= ppc_radix64_partition_scoped_xlate(cpu
, access_type
, eaddr
,
720 g_raddr
, pate
, raddr
,
721 &prot
, &psize
, false,
722 mmu_idx
, guest_visible
);
726 *psizep
= MIN(*psizep
, psize
);
736 bool ppc_radix64_xlate(PowerPCCPU
*cpu
, vaddr eaddr
, MMUAccessType access_type
,
737 hwaddr
*raddrp
, int *psizep
, int *protp
, int mmu_idx
,
740 bool ret
= ppc_radix64_xlate_impl(cpu
, eaddr
, access_type
, raddrp
,
741 psizep
, protp
, mmu_idx
, guest_visible
);
743 qemu_log_mask(CPU_LOG_MMU
, "%s for %s @0x%"VADDR_PRIx
744 " mmu_idx %u (prot %c%c%c) -> 0x%"HWADDR_PRIx
"\n",
745 __func__
, access_str(access_type
),
747 *protp
& PAGE_READ
? 'r' : '-',
748 *protp
& PAGE_WRITE
? 'w' : '-',
749 *protp
& PAGE_EXEC
? 'x' : '-',