]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - arch/x86/kernel/head64.c
db6f84936dfb089c8bbe39aa0c472d39fc985414
[mirror_ubuntu-bionic-kernel.git] / arch / x86 / kernel / head64.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * prepare to run common code
4 *
5 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
6 */
7
8 #define DISABLE_BRANCH_PROFILING
9 #include <linux/init.h>
10 #include <linux/linkage.h>
11 #include <linux/types.h>
12 #include <linux/kernel.h>
13 #include <linux/string.h>
14 #include <linux/percpu.h>
15 #include <linux/start_kernel.h>
16 #include <linux/io.h>
17 #include <linux/memblock.h>
18 #include <linux/mem_encrypt.h>
19
20 #include <asm/processor.h>
21 #include <asm/proto.h>
22 #include <asm/smp.h>
23 #include <asm/setup.h>
24 #include <asm/desc.h>
25 #include <asm/pgtable.h>
26 #include <asm/tlbflush.h>
27 #include <asm/sections.h>
28 #include <asm/kdebug.h>
29 #include <asm/e820/api.h>
30 #include <asm/bios_ebda.h>
31 #include <asm/bootparam_utils.h>
32 #include <asm/microcode.h>
33 #include <asm/kasan.h>
34
35 /*
36 * Manage page tables very early on.
37 */
38 extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
39 static unsigned int __initdata next_early_pgt;
40 pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
41
42 #define __head __section(.head.text)
43
44 static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
45 {
46 return ptr - (void *)_text + (void *)physaddr;
47 }
48
49 /* Code in __startup_64() can be relocated during execution, but the compiler
50 * doesn't have to generate PC-relative relocations when accessing globals from
51 * that function. Clang actually does not generate them, which leads to
52 * boot-time crashes. To work around this problem, every global pointer must
53 * be adjusted using fixup_pointer().
54 */
55 unsigned long __head __startup_64(unsigned long physaddr,
56 struct boot_params *bp)
57 {
58 unsigned long load_delta, *p;
59 unsigned long pgtable_flags;
60 pgdval_t *pgd;
61 p4dval_t *p4d;
62 pudval_t *pud;
63 pmdval_t *pmd, pmd_entry;
64 pteval_t *mask_ptr;
65 int i;
66 unsigned int *next_pgt_ptr;
67
68 /* Is the address too large? */
69 if (physaddr >> MAX_PHYSMEM_BITS)
70 for (;;);
71
72 /*
73 * Compute the delta between the address I am compiled to run at
74 * and the address I am actually running at.
75 */
76 load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map);
77
78 /* Is the address not 2M aligned? */
79 if (load_delta & ~PMD_PAGE_MASK)
80 for (;;);
81
82 /* Activate Secure Memory Encryption (SME) if supported and enabled */
83 sme_enable(bp);
84
85 /* Include the SME encryption mask in the fixup value */
86 load_delta += sme_get_me_mask();
87
88 /* Fixup the physical addresses in the page table */
89
90 pgd = fixup_pointer(&early_top_pgt, physaddr);
91 pgd[pgd_index(__START_KERNEL_map)] += load_delta;
92
93 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
94 p4d = fixup_pointer(&level4_kernel_pgt, physaddr);
95 p4d[511] += load_delta;
96 }
97
98 pud = fixup_pointer(&level3_kernel_pgt, physaddr);
99 pud[510] += load_delta;
100 pud[511] += load_delta;
101
102 pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
103 pmd[506] += load_delta;
104
105 /*
106 * Set up the identity mapping for the switchover. These
107 * entries should *NOT* have the global bit set! This also
108 * creates a bunch of nonsense entries but that is fine --
109 * it avoids problems around wraparound.
110 */
111
112 next_pgt_ptr = fixup_pointer(&next_early_pgt, physaddr);
113 pud = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr);
114 pmd = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr);
115
116 pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
117
118 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
119 p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
120
121 i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
122 pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
123 pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
124
125 i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D;
126 p4d[i + 0] = (pgdval_t)pud + pgtable_flags;
127 p4d[i + 1] = (pgdval_t)pud + pgtable_flags;
128 } else {
129 i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
130 pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
131 pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
132 }
133
134 i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD;
135 pud[i + 0] = (pudval_t)pmd + pgtable_flags;
136 pud[i + 1] = (pudval_t)pmd + pgtable_flags;
137
138 pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
139 /* Filter out unsupported __PAGE_KERNEL_* bits: */
140 mask_ptr = fixup_pointer(&__supported_pte_mask, physaddr);
141 pmd_entry &= *mask_ptr;
142 pmd_entry += sme_get_me_mask();
143 pmd_entry += physaddr;
144
145 for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
146 int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD;
147 pmd[idx] = pmd_entry + i * PMD_SIZE;
148 }
149
150 /*
151 * Fixup the kernel text+data virtual addresses. Note that
152 * we might write invalid pmds, when the kernel is relocated
153 * cleanup_highmap() fixes this up along with the mappings
154 * beyond _end.
155 */
156
157 pmd = fixup_pointer(level2_kernel_pgt, physaddr);
158 for (i = 0; i < PTRS_PER_PMD; i++) {
159 if (pmd[i] & _PAGE_PRESENT)
160 pmd[i] += load_delta;
161 }
162
163 /*
164 * Fixup phys_base - remove the memory encryption mask to obtain
165 * the true physical address.
166 */
167 p = fixup_pointer(&phys_base, physaddr);
168 *p += load_delta - sme_get_me_mask();
169
170 /* Encrypt the kernel and related (if SME is active) */
171 sme_encrypt_kernel(bp);
172
173 /*
174 * Return the SME encryption mask (if SME is active) to be used as a
175 * modifier for the initial pgdir entry programmed into CR3.
176 */
177 return sme_get_me_mask();
178 }
179
180 unsigned long __startup_secondary_64(void)
181 {
182 /*
183 * Return the SME encryption mask (if SME is active) to be used as a
184 * modifier for the initial pgdir entry programmed into CR3.
185 */
186 return sme_get_me_mask();
187 }
188
189 /* Wipe all early page tables except for the kernel symbol map */
190 static void __init reset_early_page_tables(void)
191 {
192 memset(early_top_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1));
193 next_early_pgt = 0;
194 write_cr3(__sme_pa_nodebug(early_top_pgt));
195 }
196
197 /* Create a new PMD entry */
198 int __init __early_make_pgtable(unsigned long address, pmdval_t pmd)
199 {
200 unsigned long physaddr = address - __PAGE_OFFSET;
201 pgdval_t pgd, *pgd_p;
202 p4dval_t p4d, *p4d_p;
203 pudval_t pud, *pud_p;
204 pmdval_t *pmd_p;
205
206 /* Invalid address or early pgt is done ? */
207 if (physaddr >= MAXMEM || read_cr3_pa() != __pa_nodebug(early_top_pgt))
208 return -1;
209
210 again:
211 pgd_p = &early_top_pgt[pgd_index(address)].pgd;
212 pgd = *pgd_p;
213
214 /*
215 * The use of __START_KERNEL_map rather than __PAGE_OFFSET here is
216 * critical -- __PAGE_OFFSET would point us back into the dynamic
217 * range and we might end up looping forever...
218 */
219 if (!IS_ENABLED(CONFIG_X86_5LEVEL))
220 p4d_p = pgd_p;
221 else if (pgd)
222 p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
223 else {
224 if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
225 reset_early_page_tables();
226 goto again;
227 }
228
229 p4d_p = (p4dval_t *)early_dynamic_pgts[next_early_pgt++];
230 memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
231 *pgd_p = (pgdval_t)p4d_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
232 }
233 p4d_p += p4d_index(address);
234 p4d = *p4d_p;
235
236 if (p4d)
237 pud_p = (pudval_t *)((p4d & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
238 else {
239 if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
240 reset_early_page_tables();
241 goto again;
242 }
243
244 pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
245 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
246 *p4d_p = (p4dval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
247 }
248 pud_p += pud_index(address);
249 pud = *pud_p;
250
251 if (pud)
252 pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
253 else {
254 if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
255 reset_early_page_tables();
256 goto again;
257 }
258
259 pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++];
260 memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
261 *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
262 }
263 pmd_p[pmd_index(address)] = pmd;
264
265 return 0;
266 }
267
268 int __init early_make_pgtable(unsigned long address)
269 {
270 unsigned long physaddr = address - __PAGE_OFFSET;
271 pmdval_t pmd;
272
273 pmd = (physaddr & PMD_MASK) + early_pmd_flags;
274
275 return __early_make_pgtable(address, pmd);
276 }
277
278 /* Don't add a printk in there. printk relies on the PDA which is not initialized
279 yet. */
280 static void __init clear_bss(void)
281 {
282 memset(__bss_start, 0,
283 (unsigned long) __bss_stop - (unsigned long) __bss_start);
284 }
285
286 static unsigned long get_cmd_line_ptr(void)
287 {
288 unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
289
290 cmd_line_ptr |= (u64)boot_params.ext_cmd_line_ptr << 32;
291
292 return cmd_line_ptr;
293 }
294
295 static void __init copy_bootdata(char *real_mode_data)
296 {
297 char * command_line;
298 unsigned long cmd_line_ptr;
299
300 /*
301 * If SME is active, this will create decrypted mappings of the
302 * boot data in advance of the copy operations.
303 */
304 sme_map_bootdata(real_mode_data);
305
306 memcpy(&boot_params, real_mode_data, sizeof boot_params);
307 sanitize_boot_params(&boot_params);
308 cmd_line_ptr = get_cmd_line_ptr();
309 if (cmd_line_ptr) {
310 command_line = __va(cmd_line_ptr);
311 memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
312 }
313
314 /*
315 * The old boot data is no longer needed and won't be reserved,
316 * freeing up that memory for use by the system. If SME is active,
317 * we need to remove the mappings that were created so that the
318 * memory doesn't remain mapped as decrypted.
319 */
320 sme_unmap_bootdata(real_mode_data);
321 }
322
323 asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
324 {
325 /*
326 * Build-time sanity checks on the kernel image and module
327 * area mappings. (these are purely build-time and produce no code)
328 */
329 BUILD_BUG_ON(MODULES_VADDR < __START_KERNEL_map);
330 BUILD_BUG_ON(MODULES_VADDR - __START_KERNEL_map < KERNEL_IMAGE_SIZE);
331 BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
332 BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0);
333 BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
334 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
335 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
336 (__START_KERNEL & PGDIR_MASK)));
337 BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
338
339 cr4_init_shadow();
340
341 /* Kill off the identity-map trampoline */
342 reset_early_page_tables();
343
344 clear_bss();
345
346 clear_page(init_top_pgt);
347
348 /*
349 * SME support may update early_pmd_flags to include the memory
350 * encryption mask, so it needs to be called before anything
351 * that may generate a page fault.
352 */
353 sme_early_init();
354
355 kasan_early_init();
356
357 idt_setup_early_handler();
358
359 copy_bootdata(__va(real_mode_data));
360
361 /*
362 * Load microcode early on BSP.
363 */
364 load_ucode_bsp();
365
366 /* set init_top_pgt kernel high mapping*/
367 init_top_pgt[511] = early_top_pgt[511];
368
369 x86_64_start_reservations(real_mode_data);
370 }
371
372 void __init x86_64_start_reservations(char *real_mode_data)
373 {
374 /* version is always not zero if it is copied */
375 if (!boot_params.hdr.version)
376 copy_bootdata(__va(real_mode_data));
377
378 x86_early_init_platform_quirks();
379
380 switch (boot_params.hdr.hardware_subarch) {
381 case X86_SUBARCH_INTEL_MID:
382 x86_intel_mid_early_setup();
383 break;
384 default:
385 break;
386 }
387
388 start_kernel();
389 }