]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
478dc89c | 2 | #include <linux/jump_label.h> |
8c1f7558 | 3 | #include <asm/unwind_hints.h> |
8a09317b DH |
4 | #include <asm/cpufeatures.h> |
5 | #include <asm/page_types.h> | |
6fd166aa PZ |
6 | #include <asm/percpu.h> |
7 | #include <asm/asm-offsets.h> | |
8 | #include <asm/processor-flags.h> | |
478dc89c | 9 | |
0c2bd5a5 | 10 | /* |
063f8913 IM |
11 | |
12 | x86 function call convention, 64-bit: | |
13 | ------------------------------------- | |
14 | arguments | callee-saved | extra caller-saved | return | |
15 | [callee-clobbered] | | [callee-clobbered] | | |
16 | --------------------------------------------------------------------------- | |
17 | rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11 | rax, rdx [**] | |
18 | ||
19 | ( rsp is obviously invariant across normal function calls. (gcc can 'merge' | |
20 | functions when it sees tail-call optimization possibilities) rflags is | |
21 | clobbered. Leftover arguments are passed over the stack frame.) | |
22 | ||
23 | [*] In the frame-pointers case rbp is fixed to the stack frame. | |
24 | ||
25 | [**] for struct return values wider than 64 bits the return convention is a | |
26 | bit more complex: up to 128 bits width we return small structures | |
27 | straight in rax, rdx. For structures larger than that (3 words or | |
28 | larger) the caller puts a pointer to an on-stack return struct | |
29 | [allocated in the caller's stack frame] into the first argument - i.e. | |
30 | into rdi. All other arguments shift up by one in this case. | |
31 | Fortunately this case is rare in the kernel. | |
32 | ||
33 | For 32-bit we have the following conventions - kernel is built with | |
34 | -mregparm=3 and -freg-struct-return: | |
35 | ||
36 | x86 function calling convention, 32-bit: | |
37 | ---------------------------------------- | |
38 | arguments | callee-saved | extra caller-saved | return | |
39 | [callee-clobbered] | | [callee-clobbered] | | |
40 | ------------------------------------------------------------------------- | |
41 | eax edx ecx | ebx edi esi ebp [*] | <none> | eax, edx [**] | |
42 | ||
43 | ( here too esp is obviously invariant across normal function calls. eflags | |
44 | is clobbered. Leftover arguments are passed over the stack frame. ) | |
45 | ||
46 | [*] In the frame-pointers case ebp is fixed to the stack frame. | |
47 | ||
48 | [**] We build with -freg-struct-return, which on 32-bit means similar | |
49 | semantics as on 64-bit: edx can be used for a second return value | |
50 | (i.e. covering integer and structure sizes up to 64 bits) - after that | |
51 | it gets more complex and more expensive: 3-word or larger struct returns | |
52 | get done in the caller's frame and the pointer to the return struct goes | |
53 | into regparm0, i.e. eax - the other arguments shift up and the | |
54 | function's register parameters degenerate to regparm=2 in essence. | |
55 | ||
56 | */ | |
57 | ||
1a338ac3 PZ |
58 | #ifdef CONFIG_X86_64 |
59 | ||
063f8913 | 60 | /* |
1b2b23d8 TG |
61 | * 64-bit system call stack frame layout defines and helpers, |
62 | * for assembly code: | |
0c2bd5a5 | 63 | */ |
1da177e4 | 64 | |
76f5df43 DV |
65 | /* The layout forms the "struct pt_regs" on the stack: */ |
66 | /* | |
67 | * C ABI says these regs are callee-preserved. They aren't saved on kernel entry | |
68 | * unless syscall needs a complete, fully filled "struct pt_regs". | |
69 | */ | |
70 | #define R15 0*8 | |
71 | #define R14 1*8 | |
72 | #define R13 2*8 | |
73 | #define R12 3*8 | |
74 | #define RBP 4*8 | |
75 | #define RBX 5*8 | |
76 | /* These regs are callee-clobbered. Always saved on kernel entry. */ | |
77 | #define R11 6*8 | |
78 | #define R10 7*8 | |
79 | #define R9 8*8 | |
80 | #define R8 9*8 | |
81 | #define RAX 10*8 | |
82 | #define RCX 11*8 | |
83 | #define RDX 12*8 | |
84 | #define RSI 13*8 | |
85 | #define RDI 14*8 | |
86 | /* | |
87 | * On syscall entry, this is syscall#. On CPU exception, this is error code. | |
88 | * On hw interrupt, it's IRQ number: | |
89 | */ | |
90 | #define ORIG_RAX 15*8 | |
91 | /* Return frame for iretq */ | |
92 | #define RIP 16*8 | |
93 | #define CS 17*8 | |
94 | #define EFLAGS 18*8 | |
95 | #define RSP 19*8 | |
96 | #define SS 20*8 | |
97 | ||
911d2bb5 DV |
98 | #define SIZEOF_PTREGS 21*8 |
99 | ||
f42a5184 | 100 | .macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax |
bf95684b DB |
101 | /* |
102 | * Push registers and sanitize registers of values that a | |
103 | * speculation attack might otherwise want to exploit. The | |
104 | * lower registers are likely clobbered well before they | |
105 | * could be put to use in a speculative execution gadget. | |
106 | * Interleave XOR with PUSH for better uop scheduling: | |
107 | */ | |
108 | pushq %rdi /* pt_regs->di */ | |
109 | pushq %rsi /* pt_regs->si */ | |
ca38eeee | 110 | pushq \rdx /* pt_regs->dx */ |
bf95684b | 111 | pushq %rcx /* pt_regs->cx */ |
ca38eeee | 112 | pushq \rax /* pt_regs->ax */ |
bf95684b DB |
113 | pushq %r8 /* pt_regs->r8 */ |
114 | xorq %r8, %r8 /* nospec r8 */ | |
115 | pushq %r9 /* pt_regs->r9 */ | |
116 | xorq %r9, %r9 /* nospec r9 */ | |
117 | pushq %r10 /* pt_regs->r10 */ | |
118 | xorq %r10, %r10 /* nospec r10 */ | |
119 | pushq %r11 /* pt_regs->r11 */ | |
120 | xorq %r11, %r11 /* nospec r11*/ | |
121 | pushq %rbx /* pt_regs->rbx */ | |
122 | xorl %ebx, %ebx /* nospec rbx*/ | |
123 | pushq %rbp /* pt_regs->rbp */ | |
124 | xorl %ebp, %ebp /* nospec rbp*/ | |
125 | pushq %r12 /* pt_regs->r12 */ | |
126 | xorq %r12, %r12 /* nospec r12*/ | |
127 | pushq %r13 /* pt_regs->r13 */ | |
128 | xorq %r13, %r13 /* nospec r13*/ | |
129 | pushq %r14 /* pt_regs->r14 */ | |
130 | xorq %r14, %r14 /* nospec r14*/ | |
131 | pushq %r15 /* pt_regs->r15 */ | |
132 | xorq %r15, %r15 /* nospec r15*/ | |
133 | UNWIND_HINT_REGS | |
f42a5184 | 134 | .endm |
bf95684b | 135 | |
f42a5184 | 136 | .macro POP_REGS pop_rdi=1 skip_r11rcx=0 |
e872045b AL |
137 | popq %r15 |
138 | popq %r14 | |
139 | popq %r13 | |
140 | popq %r12 | |
141 | popq %rbp | |
142 | popq %rbx | |
7b3a5234 DB |
143 | .if \skip_r11rcx |
144 | popq %rsi | |
145 | .else | |
e872045b | 146 | popq %r11 |
7b3a5234 | 147 | .endif |
e872045b AL |
148 | popq %r10 |
149 | popq %r9 | |
150 | popq %r8 | |
151 | popq %rax | |
7b3a5234 DB |
152 | .if \skip_r11rcx |
153 | popq %rsi | |
154 | .else | |
e872045b | 155 | popq %rcx |
7b3a5234 | 156 | .endif |
e872045b AL |
157 | popq %rdx |
158 | popq %rsi | |
7b3a5234 | 159 | .if \pop_rdi |
e872045b | 160 | popq %rdi |
7b3a5234 | 161 | .endif |
f42a5184 | 162 | .endm |
1a338ac3 | 163 | |
946c1911 JP |
164 | /* |
165 | * This is a sneaky trick to help the unwinder find pt_regs on the stack. The | |
166 | * frame pointer is replaced with an encoded pointer to pt_regs. The encoding | |
167 | * is just setting the LSB, which makes it an invalid stack address and is also | |
168 | * a signal to the unwinder that it's a pt_regs pointer in disguise. | |
169 | * | |
588ab362 | 170 | * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts |
946c1911 JP |
171 | * the original rbp. |
172 | */ | |
173 | .macro ENCODE_FRAME_POINTER ptregs_offset=0 | |
174 | #ifdef CONFIG_FRAME_POINTER | |
175 | .if \ptregs_offset | |
176 | leaq \ptregs_offset(%rsp), %rbp | |
177 | .else | |
178 | mov %rsp, %rbp | |
179 | .endif | |
180 | orq $0x1, %rbp | |
181 | #endif | |
182 | .endm | |
183 | ||
8a09317b DH |
184 | #ifdef CONFIG_PAGE_TABLE_ISOLATION |
185 | ||
6fd166aa PZ |
186 | /* |
187 | * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two | |
188 | * halves: | |
189 | */ | |
f10ee3dc TG |
190 | #define PTI_USER_PGTABLE_BIT PAGE_SHIFT |
191 | #define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT) | |
192 | #define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT | |
193 | #define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT) | |
194 | #define PTI_USER_PGTABLE_AND_PCID_MASK (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK) | |
8a09317b | 195 | |
6fd166aa PZ |
196 | .macro SET_NOFLUSH_BIT reg:req |
197 | bts $X86_CR3_PCID_NOFLUSH_BIT, \reg | |
8a09317b DH |
198 | .endm |
199 | ||
6fd166aa PZ |
200 | .macro ADJUST_KERNEL_CR3 reg:req |
201 | ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID | |
202 | /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */ | |
f10ee3dc | 203 | andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg |
8a09317b DH |
204 | .endm |
205 | ||
206 | .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req | |
aa8c6248 | 207 | ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI |
8a09317b DH |
208 | mov %cr3, \scratch_reg |
209 | ADJUST_KERNEL_CR3 \scratch_reg | |
210 | mov \scratch_reg, %cr3 | |
aa8c6248 | 211 | .Lend_\@: |
8a09317b DH |
212 | .endm |
213 | ||
6fd166aa PZ |
214 | #define THIS_CPU_user_pcid_flush_mask \ |
215 | PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask | |
216 | ||
217 | .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req | |
aa8c6248 | 218 | ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI |
8a09317b | 219 | mov %cr3, \scratch_reg |
6fd166aa PZ |
220 | |
221 | ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID | |
222 | ||
223 | /* | |
224 | * Test if the ASID needs a flush. | |
225 | */ | |
226 | movq \scratch_reg, \scratch_reg2 | |
227 | andq $(0x7FF), \scratch_reg /* mask ASID */ | |
228 | bt \scratch_reg, THIS_CPU_user_pcid_flush_mask | |
229 | jnc .Lnoflush_\@ | |
230 | ||
231 | /* Flush needed, clear the bit */ | |
232 | btr \scratch_reg, THIS_CPU_user_pcid_flush_mask | |
233 | movq \scratch_reg2, \scratch_reg | |
f10ee3dc | 234 | jmp .Lwrcr3_pcid_\@ |
6fd166aa PZ |
235 | |
236 | .Lnoflush_\@: | |
237 | movq \scratch_reg2, \scratch_reg | |
238 | SET_NOFLUSH_BIT \scratch_reg | |
239 | ||
f10ee3dc TG |
240 | .Lwrcr3_pcid_\@: |
241 | /* Flip the ASID to the user version */ | |
242 | orq $(PTI_USER_PCID_MASK), \scratch_reg | |
243 | ||
6fd166aa | 244 | .Lwrcr3_\@: |
f10ee3dc TG |
245 | /* Flip the PGD to the user version */ |
246 | orq $(PTI_USER_PGTABLE_MASK), \scratch_reg | |
8a09317b | 247 | mov \scratch_reg, %cr3 |
aa8c6248 | 248 | .Lend_\@: |
8a09317b DH |
249 | .endm |
250 | ||
6fd166aa PZ |
251 | .macro SWITCH_TO_USER_CR3_STACK scratch_reg:req |
252 | pushq %rax | |
253 | SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax | |
254 | popq %rax | |
255 | .endm | |
256 | ||
8a09317b | 257 | .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req |
aa8c6248 | 258 | ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI |
8a09317b DH |
259 | movq %cr3, \scratch_reg |
260 | movq \scratch_reg, \save_reg | |
261 | /* | |
f10ee3dc TG |
262 | * Test the user pagetable bit. If set, then the user page tables |
263 | * are active. If clear CR3 already has the kernel page table | |
264 | * active. | |
8a09317b | 265 | */ |
f10ee3dc TG |
266 | bt $PTI_USER_PGTABLE_BIT, \scratch_reg |
267 | jnc .Ldone_\@ | |
8a09317b DH |
268 | |
269 | ADJUST_KERNEL_CR3 \scratch_reg | |
270 | movq \scratch_reg, %cr3 | |
271 | ||
272 | .Ldone_\@: | |
273 | .endm | |
274 | ||
21e94459 | 275 | .macro RESTORE_CR3 scratch_reg:req save_reg:req |
aa8c6248 | 276 | ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI |
21e94459 PZ |
277 | |
278 | ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID | |
279 | ||
280 | /* | |
281 | * KERNEL pages can always resume with NOFLUSH as we do | |
282 | * explicit flushes. | |
283 | */ | |
f10ee3dc | 284 | bt $PTI_USER_PGTABLE_BIT, \save_reg |
21e94459 PZ |
285 | jnc .Lnoflush_\@ |
286 | ||
287 | /* | |
288 | * Check if there's a pending flush for the user ASID we're | |
289 | * about to set. | |
290 | */ | |
291 | movq \save_reg, \scratch_reg | |
292 | andq $(0x7FF), \scratch_reg | |
293 | bt \scratch_reg, THIS_CPU_user_pcid_flush_mask | |
294 | jnc .Lnoflush_\@ | |
295 | ||
296 | btr \scratch_reg, THIS_CPU_user_pcid_flush_mask | |
297 | jmp .Lwrcr3_\@ | |
298 | ||
299 | .Lnoflush_\@: | |
300 | SET_NOFLUSH_BIT \save_reg | |
301 | ||
302 | .Lwrcr3_\@: | |
8a09317b DH |
303 | /* |
304 | * The CR3 write could be avoided when not changing its value, | |
305 | * but would require a CR3 read *and* a scratch register. | |
306 | */ | |
307 | movq \save_reg, %cr3 | |
aa8c6248 | 308 | .Lend_\@: |
8a09317b DH |
309 | .endm |
310 | ||
311 | #else /* CONFIG_PAGE_TABLE_ISOLATION=n: */ | |
312 | ||
313 | .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req | |
314 | .endm | |
6fd166aa PZ |
315 | .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req |
316 | .endm | |
317 | .macro SWITCH_TO_USER_CR3_STACK scratch_reg:req | |
8a09317b DH |
318 | .endm |
319 | .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req | |
320 | .endm | |
21e94459 | 321 | .macro RESTORE_CR3 scratch_reg:req save_reg:req |
8a09317b DH |
322 | .endm |
323 | ||
324 | #endif | |
325 | ||
1a338ac3 PZ |
326 | #endif /* CONFIG_X86_64 */ |
327 | ||
478dc89c AL |
328 | /* |
329 | * This does 'call enter_from_user_mode' unless we can avoid it based on | |
330 | * kernel config or using the static jump infrastructure. | |
331 | */ | |
332 | .macro CALL_enter_from_user_mode | |
333 | #ifdef CONFIG_CONTEXT_TRACKING | |
334 | #ifdef HAVE_JUMP_LABEL | |
335 | STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0 | |
336 | #endif | |
337 | call enter_from_user_mode | |
338 | .Lafter_call_\@: | |
339 | #endif | |
340 | .endm |