]>
Commit | Line | Data |
---|---|---|
7744ccdb TL |
1 | /* |
2 | * AMD Memory Encryption Support | |
3 | * | |
4 | * Copyright (C) 2016 Advanced Micro Devices, Inc. | |
5 | * | |
6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or modify | |
9 | * it under the terms of the GNU General Public License version 2 as | |
10 | * published by the Free Software Foundation. | |
11 | */ | |
12 | ||
bc829ee3 TL |
13 | #define DISABLE_BRANCH_PROFILING |
14 | ||
7744ccdb | 15 | #include <linux/linkage.h> |
5868f365 | 16 | #include <linux/init.h> |
21729f81 | 17 | #include <linux/mm.h> |
c7753208 TL |
18 | #include <linux/dma-mapping.h> |
19 | #include <linux/swiotlb.h> | |
aca20d54 | 20 | #include <linux/mem_encrypt.h> |
7744ccdb | 21 | |
7f8b7e7f TL |
22 | #include <asm/tlbflush.h> |
23 | #include <asm/fixmap.h> | |
b9d05200 TL |
24 | #include <asm/setup.h> |
25 | #include <asm/bootparam.h> | |
c7753208 | 26 | #include <asm/set_memory.h> |
6ebcb060 TL |
27 | #include <asm/cacheflush.h> |
28 | #include <asm/sections.h> | |
aca20d54 TL |
29 | #include <asm/processor-flags.h> |
30 | #include <asm/msr.h> | |
31 | #include <asm/cmdline.h> | |
32 | ||
33 | static char sme_cmdline_arg[] __initdata = "mem_encrypt"; | |
34 | static char sme_cmdline_on[] __initdata = "on"; | |
35 | static char sme_cmdline_off[] __initdata = "off"; | |
7f8b7e7f | 36 | |
7744ccdb TL |
37 | /* |
38 | * Since SME related variables are set early in the boot process they must | |
39 | * reside in the .data section so as not to be zeroed out when the .bss | |
40 | * section is later cleared. | |
41 | */ | |
21d9bb4a | 42 | u64 sme_me_mask __section(.data) = 0; |
7744ccdb | 43 | EXPORT_SYMBOL_GPL(sme_me_mask); |
5868f365 | 44 | |
d8aa7eea TL |
45 | static bool sev_enabled __section(.data); |
46 | ||
7f8b7e7f TL |
47 | /* Buffer used for early in-place encryption by BSP, no locking needed */ |
48 | static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); | |
49 | ||
50 | /* | |
51 | * This routine does not change the underlying encryption setting of the | |
52 | * page(s) that map this memory. It assumes that eventually the memory is | |
53 | * meant to be accessed as either encrypted or decrypted but the contents | |
54 | * are currently not in the desired state. | |
55 | * | |
56 | * This routine follows the steps outlined in the AMD64 Architecture | |
57 | * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place. | |
58 | */ | |
59 | static void __init __sme_early_enc_dec(resource_size_t paddr, | |
60 | unsigned long size, bool enc) | |
61 | { | |
62 | void *src, *dst; | |
63 | size_t len; | |
64 | ||
65 | if (!sme_me_mask) | |
66 | return; | |
67 | ||
68 | local_flush_tlb(); | |
69 | wbinvd(); | |
70 | ||
71 | /* | |
72 | * There are limited number of early mapping slots, so map (at most) | |
73 | * one page at time. | |
74 | */ | |
75 | while (size) { | |
76 | len = min_t(size_t, sizeof(sme_early_buffer), size); | |
77 | ||
78 | /* | |
79 | * Create mappings for the current and desired format of | |
80 | * the memory. Use a write-protected mapping for the source. | |
81 | */ | |
82 | src = enc ? early_memremap_decrypted_wp(paddr, len) : | |
83 | early_memremap_encrypted_wp(paddr, len); | |
84 | ||
85 | dst = enc ? early_memremap_encrypted(paddr, len) : | |
86 | early_memremap_decrypted(paddr, len); | |
87 | ||
88 | /* | |
89 | * If a mapping can't be obtained to perform the operation, | |
90 | * then eventual access of that area in the desired mode | |
91 | * will cause a crash. | |
92 | */ | |
93 | BUG_ON(!src || !dst); | |
94 | ||
95 | /* | |
96 | * Use a temporary buffer, of cache-line multiple size, to | |
97 | * avoid data corruption as documented in the APM. | |
98 | */ | |
99 | memcpy(sme_early_buffer, src, len); | |
100 | memcpy(dst, sme_early_buffer, len); | |
101 | ||
102 | early_memunmap(dst, len); | |
103 | early_memunmap(src, len); | |
104 | ||
105 | paddr += len; | |
106 | size -= len; | |
107 | } | |
108 | } | |
109 | ||
110 | void __init sme_early_encrypt(resource_size_t paddr, unsigned long size) | |
111 | { | |
112 | __sme_early_enc_dec(paddr, size, true); | |
113 | } | |
114 | ||
115 | void __init sme_early_decrypt(resource_size_t paddr, unsigned long size) | |
116 | { | |
117 | __sme_early_enc_dec(paddr, size, false); | |
118 | } | |
119 | ||
b9d05200 TL |
120 | static void __init __sme_early_map_unmap_mem(void *vaddr, unsigned long size, |
121 | bool map) | |
122 | { | |
123 | unsigned long paddr = (unsigned long)vaddr - __PAGE_OFFSET; | |
124 | pmdval_t pmd_flags, pmd; | |
125 | ||
126 | /* Use early_pmd_flags but remove the encryption mask */ | |
127 | pmd_flags = __sme_clr(early_pmd_flags); | |
128 | ||
129 | do { | |
130 | pmd = map ? (paddr & PMD_MASK) + pmd_flags : 0; | |
131 | __early_make_pgtable((unsigned long)vaddr, pmd); | |
132 | ||
133 | vaddr += PMD_SIZE; | |
134 | paddr += PMD_SIZE; | |
135 | size = (size <= PMD_SIZE) ? 0 : size - PMD_SIZE; | |
136 | } while (size); | |
137 | ||
138 | __native_flush_tlb(); | |
139 | } | |
140 | ||
141 | void __init sme_unmap_bootdata(char *real_mode_data) | |
142 | { | |
143 | struct boot_params *boot_data; | |
144 | unsigned long cmdline_paddr; | |
145 | ||
146 | if (!sme_active()) | |
147 | return; | |
148 | ||
149 | /* Get the command line address before unmapping the real_mode_data */ | |
150 | boot_data = (struct boot_params *)real_mode_data; | |
151 | cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); | |
152 | ||
153 | __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), false); | |
154 | ||
155 | if (!cmdline_paddr) | |
156 | return; | |
157 | ||
158 | __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, false); | |
159 | } | |
160 | ||
161 | void __init sme_map_bootdata(char *real_mode_data) | |
162 | { | |
163 | struct boot_params *boot_data; | |
164 | unsigned long cmdline_paddr; | |
165 | ||
166 | if (!sme_active()) | |
167 | return; | |
168 | ||
169 | __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), true); | |
170 | ||
171 | /* Get the command line address after mapping the real_mode_data */ | |
172 | boot_data = (struct boot_params *)real_mode_data; | |
173 | cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); | |
174 | ||
175 | if (!cmdline_paddr) | |
176 | return; | |
177 | ||
178 | __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true); | |
179 | } | |
180 | ||
21729f81 TL |
181 | void __init sme_early_init(void) |
182 | { | |
183 | unsigned int i; | |
184 | ||
185 | if (!sme_me_mask) | |
186 | return; | |
187 | ||
188 | early_pmd_flags = __sme_set(early_pmd_flags); | |
189 | ||
190 | __supported_pte_mask = __sme_set(__supported_pte_mask); | |
191 | ||
192 | /* Update the protection map with memory encryption mask */ | |
193 | for (i = 0; i < ARRAY_SIZE(protection_map); i++) | |
194 | protection_map[i] = pgprot_encrypted(protection_map[i]); | |
d7b417fa TL |
195 | |
196 | if (sev_active()) | |
197 | swiotlb_force = SWIOTLB_FORCE; | |
198 | } | |
199 | ||
200 | static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, | |
201 | gfp_t gfp, unsigned long attrs) | |
202 | { | |
203 | unsigned long dma_mask; | |
204 | unsigned int order; | |
205 | struct page *page; | |
206 | void *vaddr = NULL; | |
207 | ||
208 | dma_mask = dma_alloc_coherent_mask(dev, gfp); | |
209 | order = get_order(size); | |
210 | ||
211 | /* | |
212 | * Memory will be memset to zero after marking decrypted, so don't | |
213 | * bother clearing it before. | |
214 | */ | |
215 | gfp &= ~__GFP_ZERO; | |
216 | ||
217 | page = alloc_pages_node(dev_to_node(dev), gfp, order); | |
218 | if (page) { | |
219 | dma_addr_t addr; | |
220 | ||
221 | /* | |
222 | * Since we will be clearing the encryption bit, check the | |
223 | * mask with it already cleared. | |
224 | */ | |
225 | addr = __sme_clr(phys_to_dma(dev, page_to_phys(page))); | |
226 | if ((addr + size) > dma_mask) { | |
227 | __free_pages(page, get_order(size)); | |
228 | } else { | |
229 | vaddr = page_address(page); | |
230 | *dma_handle = addr; | |
231 | } | |
232 | } | |
233 | ||
234 | if (!vaddr) | |
235 | vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, gfp); | |
236 | ||
237 | if (!vaddr) | |
238 | return NULL; | |
239 | ||
240 | /* Clear the SME encryption bit for DMA use if not swiotlb area */ | |
241 | if (!is_swiotlb_buffer(dma_to_phys(dev, *dma_handle))) { | |
242 | set_memory_decrypted((unsigned long)vaddr, 1 << order); | |
243 | memset(vaddr, 0, PAGE_SIZE << order); | |
244 | *dma_handle = __sme_clr(*dma_handle); | |
245 | } | |
246 | ||
247 | return vaddr; | |
248 | } | |
249 | ||
250 | static void sev_free(struct device *dev, size_t size, void *vaddr, | |
251 | dma_addr_t dma_handle, unsigned long attrs) | |
252 | { | |
253 | /* Set the SME encryption bit for re-use if not swiotlb area */ | |
254 | if (!is_swiotlb_buffer(dma_to_phys(dev, dma_handle))) | |
255 | set_memory_encrypted((unsigned long)vaddr, | |
256 | 1 << get_order(size)); | |
257 | ||
258 | swiotlb_free_coherent(dev, size, vaddr, dma_handle); | |
21729f81 TL |
259 | } |
260 | ||
d8aa7eea TL |
261 | /* |
262 | * SME and SEV are very similar but they are not the same, so there are | |
263 | * times that the kernel will need to distinguish between SME and SEV. The | |
264 | * sme_active() and sev_active() functions are used for this. When a | |
265 | * distinction isn't needed, the mem_encrypt_active() function can be used. | |
266 | * | |
267 | * The trampoline code is a good example for this requirement. Before | |
268 | * paging is activated, SME will access all memory as decrypted, but SEV | |
269 | * will access all memory as encrypted. So, when APs are being brought | |
270 | * up under SME the trampoline area cannot be encrypted, whereas under SEV | |
271 | * the trampoline area must be encrypted. | |
272 | */ | |
273 | bool sme_active(void) | |
274 | { | |
275 | return sme_me_mask && !sev_enabled; | |
276 | } | |
277 | EXPORT_SYMBOL_GPL(sme_active); | |
278 | ||
279 | bool sev_active(void) | |
280 | { | |
281 | return sme_me_mask && sev_enabled; | |
282 | } | |
283 | EXPORT_SYMBOL_GPL(sev_active); | |
284 | ||
d7b417fa TL |
285 | static const struct dma_map_ops sev_dma_ops = { |
286 | .alloc = sev_alloc, | |
287 | .free = sev_free, | |
288 | .map_page = swiotlb_map_page, | |
289 | .unmap_page = swiotlb_unmap_page, | |
290 | .map_sg = swiotlb_map_sg_attrs, | |
291 | .unmap_sg = swiotlb_unmap_sg_attrs, | |
292 | .sync_single_for_cpu = swiotlb_sync_single_for_cpu, | |
293 | .sync_single_for_device = swiotlb_sync_single_for_device, | |
294 | .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, | |
295 | .sync_sg_for_device = swiotlb_sync_sg_for_device, | |
296 | .mapping_error = swiotlb_dma_mapping_error, | |
297 | }; | |
298 | ||
c7753208 TL |
299 | /* Architecture __weak replacement functions */ |
300 | void __init mem_encrypt_init(void) | |
301 | { | |
302 | if (!sme_me_mask) | |
303 | return; | |
304 | ||
305 | /* Call into SWIOTLB to update the SWIOTLB DMA buffers */ | |
306 | swiotlb_update_mem_attributes(); | |
aca20d54 | 307 | |
d7b417fa TL |
308 | /* |
309 | * With SEV, DMA operations cannot use encryption. New DMA ops | |
310 | * are required in order to mark the DMA areas as decrypted or | |
311 | * to use bounce buffers. | |
312 | */ | |
313 | if (sev_active()) | |
314 | dma_ops = &sev_dma_ops; | |
315 | ||
aca20d54 | 316 | pr_info("AMD Secure Memory Encryption (SME) active\n"); |
c7753208 TL |
317 | } |
318 | ||
319 | void swiotlb_set_mem_attributes(void *vaddr, unsigned long size) | |
320 | { | |
321 | WARN(PAGE_ALIGN(size) != size, | |
322 | "size is not page-aligned (%#lx)\n", size); | |
323 | ||
324 | /* Make the SWIOTLB buffer area decrypted */ | |
325 | set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); | |
326 | } | |
327 | ||
6ebcb060 TL |
328 | static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start, |
329 | unsigned long end) | |
330 | { | |
331 | unsigned long pgd_start, pgd_end, pgd_size; | |
332 | pgd_t *pgd_p; | |
333 | ||
334 | pgd_start = start & PGDIR_MASK; | |
335 | pgd_end = end & PGDIR_MASK; | |
336 | ||
337 | pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1); | |
338 | pgd_size *= sizeof(pgd_t); | |
339 | ||
340 | pgd_p = pgd_base + pgd_index(start); | |
341 | ||
342 | memset(pgd_p, 0, pgd_size); | |
343 | } | |
344 | ||
345 | #define PGD_FLAGS _KERNPG_TABLE_NOENC | |
346 | #define P4D_FLAGS _KERNPG_TABLE_NOENC | |
347 | #define PUD_FLAGS _KERNPG_TABLE_NOENC | |
348 | #define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) | |
349 | ||
350 | static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, | |
351 | unsigned long vaddr, pmdval_t pmd_val) | |
352 | { | |
353 | pgd_t *pgd_p; | |
354 | p4d_t *p4d_p; | |
355 | pud_t *pud_p; | |
356 | pmd_t *pmd_p; | |
357 | ||
358 | pgd_p = pgd_base + pgd_index(vaddr); | |
359 | if (native_pgd_val(*pgd_p)) { | |
360 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) | |
361 | p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); | |
362 | else | |
363 | pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); | |
364 | } else { | |
365 | pgd_t pgd; | |
366 | ||
367 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | |
368 | p4d_p = pgtable_area; | |
369 | memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); | |
370 | pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; | |
371 | ||
372 | pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS); | |
373 | } else { | |
374 | pud_p = pgtable_area; | |
375 | memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); | |
376 | pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; | |
377 | ||
378 | pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS); | |
379 | } | |
380 | native_set_pgd(pgd_p, pgd); | |
381 | } | |
382 | ||
383 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | |
384 | p4d_p += p4d_index(vaddr); | |
385 | if (native_p4d_val(*p4d_p)) { | |
386 | pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK); | |
387 | } else { | |
388 | p4d_t p4d; | |
389 | ||
390 | pud_p = pgtable_area; | |
391 | memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); | |
392 | pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; | |
393 | ||
394 | p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS); | |
395 | native_set_p4d(p4d_p, p4d); | |
396 | } | |
397 | } | |
398 | ||
399 | pud_p += pud_index(vaddr); | |
400 | if (native_pud_val(*pud_p)) { | |
401 | if (native_pud_val(*pud_p) & _PAGE_PSE) | |
402 | goto out; | |
403 | ||
404 | pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK); | |
405 | } else { | |
406 | pud_t pud; | |
407 | ||
408 | pmd_p = pgtable_area; | |
409 | memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); | |
410 | pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; | |
411 | ||
412 | pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS); | |
413 | native_set_pud(pud_p, pud); | |
414 | } | |
415 | ||
416 | pmd_p += pmd_index(vaddr); | |
417 | if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) | |
418 | native_set_pmd(pmd_p, native_make_pmd(pmd_val)); | |
419 | ||
420 | out: | |
421 | return pgtable_area; | |
422 | } | |
423 | ||
424 | static unsigned long __init sme_pgtable_calc(unsigned long len) | |
425 | { | |
426 | unsigned long p4d_size, pud_size, pmd_size; | |
427 | unsigned long total; | |
428 | ||
429 | /* | |
430 | * Perform a relatively simplistic calculation of the pagetable | |
431 | * entries that are needed. That mappings will be covered by 2MB | |
432 | * PMD entries so we can conservatively calculate the required | |
433 | * number of P4D, PUD and PMD structures needed to perform the | |
434 | * mappings. Incrementing the count for each covers the case where | |
435 | * the addresses cross entries. | |
436 | */ | |
437 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | |
438 | p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; | |
439 | p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D; | |
440 | pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1; | |
441 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; | |
442 | } else { | |
443 | p4d_size = 0; | |
444 | pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; | |
445 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; | |
446 | } | |
447 | pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1; | |
448 | pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; | |
449 | ||
450 | total = p4d_size + pud_size + pmd_size; | |
451 | ||
452 | /* | |
453 | * Now calculate the added pagetable structures needed to populate | |
454 | * the new pagetables. | |
455 | */ | |
456 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | |
457 | p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE; | |
458 | p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D; | |
459 | pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE; | |
460 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; | |
461 | } else { | |
462 | p4d_size = 0; | |
463 | pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE; | |
464 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; | |
465 | } | |
466 | pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE; | |
467 | pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; | |
468 | ||
469 | total += p4d_size + pud_size + pmd_size; | |
470 | ||
471 | return total; | |
472 | } | |
473 | ||
5868f365 TL |
474 | void __init sme_encrypt_kernel(void) |
475 | { | |
6ebcb060 TL |
476 | unsigned long workarea_start, workarea_end, workarea_len; |
477 | unsigned long execute_start, execute_end, execute_len; | |
478 | unsigned long kernel_start, kernel_end, kernel_len; | |
479 | unsigned long pgtable_area_len; | |
480 | unsigned long paddr, pmd_flags; | |
481 | unsigned long decrypted_base; | |
482 | void *pgtable_area; | |
483 | pgd_t *pgd; | |
484 | ||
485 | if (!sme_active()) | |
486 | return; | |
487 | ||
488 | /* | |
489 | * Prepare for encrypting the kernel by building new pagetables with | |
490 | * the necessary attributes needed to encrypt the kernel in place. | |
491 | * | |
492 | * One range of virtual addresses will map the memory occupied | |
493 | * by the kernel as encrypted. | |
494 | * | |
495 | * Another range of virtual addresses will map the memory occupied | |
496 | * by the kernel as decrypted and write-protected. | |
497 | * | |
498 | * The use of write-protect attribute will prevent any of the | |
499 | * memory from being cached. | |
500 | */ | |
501 | ||
502 | /* Physical addresses gives us the identity mapped virtual addresses */ | |
503 | kernel_start = __pa_symbol(_text); | |
504 | kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); | |
505 | kernel_len = kernel_end - kernel_start; | |
506 | ||
507 | /* Set the encryption workarea to be immediately after the kernel */ | |
508 | workarea_start = kernel_end; | |
509 | ||
510 | /* | |
511 | * Calculate required number of workarea bytes needed: | |
512 | * executable encryption area size: | |
513 | * stack page (PAGE_SIZE) | |
514 | * encryption routine page (PAGE_SIZE) | |
515 | * intermediate copy buffer (PMD_PAGE_SIZE) | |
516 | * pagetable structures for the encryption of the kernel | |
517 | * pagetable structures for workarea (in case not currently mapped) | |
518 | */ | |
519 | execute_start = workarea_start; | |
520 | execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE; | |
521 | execute_len = execute_end - execute_start; | |
522 | ||
523 | /* | |
524 | * One PGD for both encrypted and decrypted mappings and a set of | |
525 | * PUDs and PMDs for each of the encrypted and decrypted mappings. | |
526 | */ | |
527 | pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; | |
528 | pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; | |
529 | ||
530 | /* PUDs and PMDs needed in the current pagetables for the workarea */ | |
531 | pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); | |
532 | ||
533 | /* | |
534 | * The total workarea includes the executable encryption area and | |
535 | * the pagetable area. | |
536 | */ | |
537 | workarea_len = execute_len + pgtable_area_len; | |
538 | workarea_end = workarea_start + workarea_len; | |
539 | ||
540 | /* | |
541 | * Set the address to the start of where newly created pagetable | |
542 | * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable | |
543 | * structures are created when the workarea is added to the current | |
544 | * pagetables and when the new encrypted and decrypted kernel | |
545 | * mappings are populated. | |
546 | */ | |
547 | pgtable_area = (void *)execute_end; | |
548 | ||
549 | /* | |
550 | * Make sure the current pagetable structure has entries for | |
551 | * addressing the workarea. | |
552 | */ | |
553 | pgd = (pgd_t *)native_read_cr3_pa(); | |
554 | paddr = workarea_start; | |
555 | while (paddr < workarea_end) { | |
556 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, | |
557 | paddr, | |
558 | paddr + PMD_FLAGS); | |
559 | ||
560 | paddr += PMD_PAGE_SIZE; | |
561 | } | |
562 | ||
563 | /* Flush the TLB - no globals so cr3 is enough */ | |
564 | native_write_cr3(__native_read_cr3()); | |
565 | ||
566 | /* | |
567 | * A new pagetable structure is being built to allow for the kernel | |
568 | * to be encrypted. It starts with an empty PGD that will then be | |
569 | * populated with new PUDs and PMDs as the encrypted and decrypted | |
570 | * kernel mappings are created. | |
571 | */ | |
572 | pgd = pgtable_area; | |
573 | memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD); | |
574 | pgtable_area += sizeof(*pgd) * PTRS_PER_PGD; | |
575 | ||
576 | /* Add encrypted kernel (identity) mappings */ | |
577 | pmd_flags = PMD_FLAGS | _PAGE_ENC; | |
578 | paddr = kernel_start; | |
579 | while (paddr < kernel_end) { | |
580 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, | |
581 | paddr, | |
582 | paddr + pmd_flags); | |
583 | ||
584 | paddr += PMD_PAGE_SIZE; | |
585 | } | |
586 | ||
587 | /* | |
588 | * A different PGD index/entry must be used to get different | |
589 | * pagetable entries for the decrypted mapping. Choose the next | |
590 | * PGD index and convert it to a virtual address to be used as | |
591 | * the base of the mapping. | |
592 | */ | |
593 | decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); | |
594 | decrypted_base <<= PGDIR_SHIFT; | |
595 | ||
596 | /* Add decrypted, write-protected kernel (non-identity) mappings */ | |
597 | pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT); | |
598 | paddr = kernel_start; | |
599 | while (paddr < kernel_end) { | |
600 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, | |
601 | paddr + decrypted_base, | |
602 | paddr + pmd_flags); | |
603 | ||
604 | paddr += PMD_PAGE_SIZE; | |
605 | } | |
606 | ||
607 | /* Add decrypted workarea mappings to both kernel mappings */ | |
608 | paddr = workarea_start; | |
609 | while (paddr < workarea_end) { | |
610 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, | |
611 | paddr, | |
612 | paddr + PMD_FLAGS); | |
613 | ||
614 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, | |
615 | paddr + decrypted_base, | |
616 | paddr + PMD_FLAGS); | |
617 | ||
618 | paddr += PMD_PAGE_SIZE; | |
619 | } | |
620 | ||
621 | /* Perform the encryption */ | |
622 | sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, | |
623 | kernel_len, workarea_start, (unsigned long)pgd); | |
624 | ||
625 | /* | |
626 | * At this point we are running encrypted. Remove the mappings for | |
627 | * the decrypted areas - all that is needed for this is to remove | |
628 | * the PGD entry/entries. | |
629 | */ | |
630 | sme_clear_pgd(pgd, kernel_start + decrypted_base, | |
631 | kernel_end + decrypted_base); | |
632 | ||
633 | sme_clear_pgd(pgd, workarea_start + decrypted_base, | |
634 | workarea_end + decrypted_base); | |
635 | ||
636 | /* Flush the TLB - no globals so cr3 is enough */ | |
637 | native_write_cr3(__native_read_cr3()); | |
5868f365 TL |
638 | } |
639 | ||
aca20d54 | 640 | void __init __nostackprotector sme_enable(struct boot_params *bp) |
5868f365 | 641 | { |
aca20d54 TL |
642 | const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; |
643 | unsigned int eax, ebx, ecx, edx; | |
644 | bool active_by_default; | |
645 | unsigned long me_mask; | |
646 | char buffer[16]; | |
647 | u64 msr; | |
648 | ||
649 | /* Check for the SME support leaf */ | |
650 | eax = 0x80000000; | |
651 | ecx = 0; | |
652 | native_cpuid(&eax, &ebx, &ecx, &edx); | |
653 | if (eax < 0x8000001f) | |
654 | return; | |
655 | ||
656 | /* | |
657 | * Check for the SME feature: | |
658 | * CPUID Fn8000_001F[EAX] - Bit 0 | |
659 | * Secure Memory Encryption support | |
660 | * CPUID Fn8000_001F[EBX] - Bits 5:0 | |
661 | * Pagetable bit position used to indicate encryption | |
662 | */ | |
663 | eax = 0x8000001f; | |
664 | ecx = 0; | |
665 | native_cpuid(&eax, &ebx, &ecx, &edx); | |
666 | if (!(eax & 1)) | |
667 | return; | |
668 | ||
669 | me_mask = 1UL << (ebx & 0x3f); | |
670 | ||
671 | /* Check if SME is enabled */ | |
672 | msr = __rdmsr(MSR_K8_SYSCFG); | |
673 | if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) | |
674 | return; | |
675 | ||
676 | /* | |
677 | * Fixups have not been applied to phys_base yet and we're running | |
678 | * identity mapped, so we must obtain the address to the SME command | |
679 | * line argument data using rip-relative addressing. | |
680 | */ | |
681 | asm ("lea sme_cmdline_arg(%%rip), %0" | |
682 | : "=r" (cmdline_arg) | |
683 | : "p" (sme_cmdline_arg)); | |
684 | asm ("lea sme_cmdline_on(%%rip), %0" | |
685 | : "=r" (cmdline_on) | |
686 | : "p" (sme_cmdline_on)); | |
687 | asm ("lea sme_cmdline_off(%%rip), %0" | |
688 | : "=r" (cmdline_off) | |
689 | : "p" (sme_cmdline_off)); | |
690 | ||
691 | if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT)) | |
692 | active_by_default = true; | |
693 | else | |
694 | active_by_default = false; | |
695 | ||
696 | cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | | |
697 | ((u64)bp->ext_cmd_line_ptr << 32)); | |
698 | ||
699 | cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)); | |
700 | ||
701 | if (!strncmp(buffer, cmdline_on, sizeof(buffer))) | |
702 | sme_me_mask = me_mask; | |
703 | else if (!strncmp(buffer, cmdline_off, sizeof(buffer))) | |
704 | sme_me_mask = 0; | |
705 | else | |
706 | sme_me_mask = active_by_default ? me_mask : 0; | |
5868f365 | 707 | } |