]>
Commit | Line | Data |
---|---|---|
26d7f65f MF |
1 | #define pr_fmt(fmt) "efi: " fmt |
2 | ||
eeb9db09 ST |
3 | #include <linux/init.h> |
4 | #include <linux/kernel.h> | |
5 | #include <linux/string.h> | |
6 | #include <linux/time.h> | |
7 | #include <linux/types.h> | |
8 | #include <linux/efi.h> | |
9 | #include <linux/slab.h> | |
10 | #include <linux/memblock.h> | |
11 | #include <linux/bootmem.h> | |
44be28e9 | 12 | #include <linux/acpi.h> |
d394f2d9 | 13 | #include <linux/dmi.h> |
eeb9db09 ST |
14 | #include <asm/efi.h> |
15 | #include <asm/uv/uv.h> | |
16 | ||
17 | #define EFI_MIN_RESERVE 5120 | |
18 | ||
19 | #define EFI_DUMMY_GUID \ | |
20 | EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9) | |
21 | ||
22 | static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 }; | |
23 | ||
24 | static bool efi_no_storage_paranoia; | |
25 | ||
26 | /* | |
27 | * Some firmware implementations refuse to boot if there's insufficient | |
28 | * space in the variable store. The implementation of garbage collection | |
29 | * in some FW versions causes stale (deleted) variables to take up space | |
30 | * longer than intended and space is only freed once the store becomes | |
31 | * almost completely full. | |
32 | * | |
33 | * Enabling this option disables the space checks in | |
34 | * efi_query_variable_store() and forces garbage collection. | |
35 | * | |
36 | * Only enable this option if deleting EFI variables does not free up | |
37 | * space in your variable store, e.g. if despite deleting variables | |
38 | * you're unable to create new ones. | |
39 | */ | |
40 | static int __init setup_storage_paranoia(char *arg) | |
41 | { | |
42 | efi_no_storage_paranoia = true; | |
43 | return 0; | |
44 | } | |
45 | early_param("efi_no_storage_paranoia", setup_storage_paranoia); | |
46 | ||
47 | /* | |
48 | * Deleting the dummy variable which kicks off garbage collection | |
49 | */ | |
50 | void efi_delete_dummy_variable(void) | |
51 | { | |
52 | efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, | |
53 | EFI_VARIABLE_NON_VOLATILE | | |
54 | EFI_VARIABLE_BOOTSERVICE_ACCESS | | |
55 | EFI_VARIABLE_RUNTIME_ACCESS, | |
56 | 0, NULL); | |
57 | } | |
58 | ||
ca0e30dc AB |
59 | /* |
60 | * In the nonblocking case we do not attempt to perform garbage | |
61 | * collection if we do not have enough free space. Rather, we do the | |
62 | * bare minimum check and give up immediately if the available space | |
63 | * is below EFI_MIN_RESERVE. | |
64 | * | |
65 | * This function is intended to be small and simple because it is | |
66 | * invoked from crash handler paths. | |
67 | */ | |
68 | static efi_status_t | |
69 | query_variable_store_nonblocking(u32 attributes, unsigned long size) | |
70 | { | |
71 | efi_status_t status; | |
72 | u64 storage_size, remaining_size, max_size; | |
73 | ||
74 | status = efi.query_variable_info_nonblocking(attributes, &storage_size, | |
75 | &remaining_size, | |
76 | &max_size); | |
77 | if (status != EFI_SUCCESS) | |
78 | return status; | |
79 | ||
80 | if (remaining_size - size < EFI_MIN_RESERVE) | |
81 | return EFI_OUT_OF_RESOURCES; | |
82 | ||
83 | return EFI_SUCCESS; | |
84 | } | |
85 | ||
eeb9db09 ST |
86 | /* |
87 | * Some firmware implementations refuse to boot if there's insufficient space | |
88 | * in the variable store. Ensure that we never use more than a safe limit. | |
89 | * | |
90 | * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable | |
91 | * store. | |
92 | */ | |
ca0e30dc AB |
93 | efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, |
94 | bool nonblocking) | |
eeb9db09 ST |
95 | { |
96 | efi_status_t status; | |
97 | u64 storage_size, remaining_size, max_size; | |
98 | ||
99 | if (!(attributes & EFI_VARIABLE_NON_VOLATILE)) | |
100 | return 0; | |
101 | ||
ca0e30dc AB |
102 | if (nonblocking) |
103 | return query_variable_store_nonblocking(attributes, size); | |
104 | ||
eeb9db09 ST |
105 | status = efi.query_variable_info(attributes, &storage_size, |
106 | &remaining_size, &max_size); | |
107 | if (status != EFI_SUCCESS) | |
108 | return status; | |
109 | ||
110 | /* | |
111 | * We account for that by refusing the write if permitting it would | |
112 | * reduce the available space to under 5KB. This figure was provided by | |
113 | * Samsung, so should be safe. | |
114 | */ | |
115 | if ((remaining_size - size < EFI_MIN_RESERVE) && | |
116 | !efi_no_storage_paranoia) { | |
117 | ||
118 | /* | |
119 | * Triggering garbage collection may require that the firmware | |
120 | * generate a real EFI_OUT_OF_RESOURCES error. We can force | |
121 | * that by attempting to use more space than is available. | |
122 | */ | |
123 | unsigned long dummy_size = remaining_size + 1024; | |
124 | void *dummy = kzalloc(dummy_size, GFP_ATOMIC); | |
125 | ||
126 | if (!dummy) | |
127 | return EFI_OUT_OF_RESOURCES; | |
128 | ||
129 | status = efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, | |
130 | EFI_VARIABLE_NON_VOLATILE | | |
131 | EFI_VARIABLE_BOOTSERVICE_ACCESS | | |
132 | EFI_VARIABLE_RUNTIME_ACCESS, | |
133 | dummy_size, dummy); | |
134 | ||
135 | if (status == EFI_SUCCESS) { | |
136 | /* | |
137 | * This should have failed, so if it didn't make sure | |
138 | * that we delete it... | |
139 | */ | |
140 | efi_delete_dummy_variable(); | |
141 | } | |
142 | ||
143 | kfree(dummy); | |
144 | ||
145 | /* | |
146 | * The runtime code may now have triggered a garbage collection | |
147 | * run, so check the variable info again | |
148 | */ | |
149 | status = efi.query_variable_info(attributes, &storage_size, | |
150 | &remaining_size, &max_size); | |
151 | ||
152 | if (status != EFI_SUCCESS) | |
153 | return status; | |
154 | ||
155 | /* | |
156 | * There still isn't enough room, so return an error | |
157 | */ | |
158 | if (remaining_size - size < EFI_MIN_RESERVE) | |
159 | return EFI_OUT_OF_RESOURCES; | |
160 | } | |
161 | ||
162 | return EFI_SUCCESS; | |
163 | } | |
164 | EXPORT_SYMBOL_GPL(efi_query_variable_store); | |
165 | ||
816e7612 MF |
166 | /* |
167 | * The UEFI specification makes it clear that the operating system is | |
168 | * free to do whatever it wants with boot services code after | |
169 | * ExitBootServices() has been called. Ignoring this recommendation a | |
170 | * significant bunch of EFI implementations continue calling into boot | |
171 | * services code (SetVirtualAddressMap). In order to work around such | |
172 | * buggy implementations we reserve boot services region during EFI | |
173 | * init and make sure it stays executable. Then, after | |
174 | * SetVirtualAddressMap(), it is discarded. | |
175 | * | |
176 | * However, some boot services regions contain data that is required | |
177 | * by drivers, so we need to track which memory ranges can never be | |
178 | * freed. This is done by tagging those regions with the | |
179 | * EFI_MEMORY_RUNTIME attribute. | |
180 | * | |
181 | * Any driver that wants to mark a region as reserved must use | |
182 | * efi_mem_reserve() which will insert a new EFI memory descriptor | |
183 | * into efi.memmap (splitting existing regions if necessary) and tag | |
184 | * it with EFI_MEMORY_RUNTIME. | |
185 | */ | |
186 | void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) | |
187 | { | |
188 | phys_addr_t new_phys, new_size; | |
189 | struct efi_mem_range mr; | |
190 | efi_memory_desc_t md; | |
191 | int num_entries; | |
192 | void *new; | |
193 | ||
194 | if (efi_mem_desc_lookup(addr, &md)) { | |
195 | pr_err("Failed to lookup EFI memory descriptor for %pa\n", &addr); | |
196 | return; | |
197 | } | |
198 | ||
199 | if (addr + size > md.phys_addr + (md.num_pages << EFI_PAGE_SHIFT)) { | |
200 | pr_err("Region spans EFI memory descriptors, %pa\n", &addr); | |
201 | return; | |
202 | } | |
203 | ||
92dc3350 MF |
204 | size += addr % EFI_PAGE_SIZE; |
205 | size = round_up(size, EFI_PAGE_SIZE); | |
206 | addr = round_down(addr, EFI_PAGE_SIZE); | |
207 | ||
816e7612 | 208 | mr.range.start = addr; |
92dc3350 | 209 | mr.range.end = addr + size - 1; |
816e7612 MF |
210 | mr.attribute = md.attribute | EFI_MEMORY_RUNTIME; |
211 | ||
212 | num_entries = efi_memmap_split_count(&md, &mr.range); | |
213 | num_entries += efi.memmap.nr_map; | |
214 | ||
215 | new_size = efi.memmap.desc_size * num_entries; | |
216 | ||
20b1e22d | 217 | new_phys = efi_memmap_alloc(num_entries); |
816e7612 MF |
218 | if (!new_phys) { |
219 | pr_err("Could not allocate boot services memmap\n"); | |
220 | return; | |
221 | } | |
222 | ||
223 | new = early_memremap(new_phys, new_size); | |
224 | if (!new) { | |
225 | pr_err("Failed to map new boot services memmap\n"); | |
226 | return; | |
227 | } | |
228 | ||
229 | efi_memmap_insert(&efi.memmap, new, &mr); | |
230 | early_memunmap(new, new_size); | |
231 | ||
232 | efi_memmap_install(new_phys, num_entries); | |
233 | } | |
234 | ||
452308de MF |
235 | /* |
236 | * Helper function for efi_reserve_boot_services() to figure out if we | |
237 | * can free regions in efi_free_boot_services(). | |
238 | * | |
239 | * Use this function to ensure we do not free regions owned by somebody | |
240 | * else. We must only reserve (and then free) regions: | |
241 | * | |
242 | * - Not within any part of the kernel | |
243 | * - Not the BIOS reserved area (E820_RESERVED, E820_NVS, etc) | |
244 | */ | |
245 | static bool can_free_region(u64 start, u64 size) | |
246 | { | |
247 | if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end)) | |
248 | return false; | |
249 | ||
250 | if (!e820_all_mapped(start, start+size, E820_RAM)) | |
251 | return false; | |
252 | ||
253 | return true; | |
254 | } | |
255 | ||
eeb9db09 ST |
256 | void __init efi_reserve_boot_services(void) |
257 | { | |
78ce248f | 258 | efi_memory_desc_t *md; |
eeb9db09 | 259 | |
78ce248f | 260 | for_each_efi_memory_desc(md) { |
eeb9db09 ST |
261 | u64 start = md->phys_addr; |
262 | u64 size = md->num_pages << EFI_PAGE_SHIFT; | |
452308de | 263 | bool already_reserved; |
eeb9db09 ST |
264 | |
265 | if (md->type != EFI_BOOT_SERVICES_CODE && | |
266 | md->type != EFI_BOOT_SERVICES_DATA) | |
267 | continue; | |
452308de MF |
268 | |
269 | already_reserved = memblock_is_region_reserved(start, size); | |
270 | ||
271 | /* | |
272 | * Because the following memblock_reserve() is paired | |
273 | * with free_bootmem_late() for this region in | |
274 | * efi_free_boot_services(), we must be extremely | |
275 | * careful not to reserve, and subsequently free, | |
276 | * critical regions of memory (like the kernel image) or | |
277 | * those regions that somebody else has already | |
278 | * reserved. | |
279 | * | |
280 | * A good example of a critical region that must not be | |
281 | * freed is page zero (first 4Kb of memory), which may | |
282 | * contain boot services code/data but is marked | |
283 | * E820_RESERVED by trim_bios_range(). | |
284 | */ | |
285 | if (!already_reserved) { | |
eeb9db09 | 286 | memblock_reserve(start, size); |
452308de MF |
287 | |
288 | /* | |
289 | * If we are the first to reserve the region, no | |
290 | * one else cares about it. We own it and can | |
291 | * free it later. | |
292 | */ | |
293 | if (can_free_region(start, size)) | |
294 | continue; | |
295 | } | |
296 | ||
297 | /* | |
298 | * We don't own the region. We must not free it. | |
299 | * | |
300 | * Setting this bit for a boot services region really | |
301 | * doesn't make sense as far as the firmware is | |
302 | * concerned, but it does provide us with a way to tag | |
303 | * those regions that must not be paired with | |
304 | * free_bootmem_late(). | |
305 | */ | |
306 | md->attribute |= EFI_MEMORY_RUNTIME; | |
eeb9db09 ST |
307 | } |
308 | } | |
309 | ||
310 | void __init efi_free_boot_services(void) | |
311 | { | |
816e7612 | 312 | phys_addr_t new_phys, new_size; |
78ce248f | 313 | efi_memory_desc_t *md; |
816e7612 MF |
314 | int num_entries = 0; |
315 | void *new, *new_md; | |
eeb9db09 | 316 | |
78ce248f | 317 | for_each_efi_memory_desc(md) { |
eeb9db09 ST |
318 | unsigned long long start = md->phys_addr; |
319 | unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; | |
5bc653b7 | 320 | size_t rm_size; |
eeb9db09 ST |
321 | |
322 | if (md->type != EFI_BOOT_SERVICES_CODE && | |
816e7612 MF |
323 | md->type != EFI_BOOT_SERVICES_DATA) { |
324 | num_entries++; | |
eeb9db09 | 325 | continue; |
816e7612 | 326 | } |
eeb9db09 | 327 | |
452308de | 328 | /* Do not free, someone else owns it: */ |
816e7612 MF |
329 | if (md->attribute & EFI_MEMORY_RUNTIME) { |
330 | num_entries++; | |
eeb9db09 | 331 | continue; |
816e7612 | 332 | } |
eeb9db09 | 333 | |
5bc653b7 AL |
334 | /* |
335 | * Nasty quirk: if all sub-1MB memory is used for boot | |
336 | * services, we can get here without having allocated the | |
337 | * real mode trampoline. It's too late to hand boot services | |
338 | * memory back to the memblock allocator, so instead | |
339 | * try to manually allocate the trampoline if needed. | |
340 | * | |
341 | * I've seen this on a Dell XPS 13 9350 with firmware | |
342 | * 1.4.4 with SGX enabled booting Linux via Fedora 24's | |
343 | * grub2-efi on a hard disk. (And no, I don't know why | |
344 | * this happened, but Linux should still try to boot rather | |
345 | * panicing early.) | |
346 | */ | |
347 | rm_size = real_mode_size_needed(); | |
348 | if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) { | |
349 | set_real_mode_mem(start, rm_size); | |
350 | start += rm_size; | |
351 | size -= rm_size; | |
352 | } | |
353 | ||
eeb9db09 ST |
354 | free_bootmem_late(start, size); |
355 | } | |
816e7612 MF |
356 | |
357 | new_size = efi.memmap.desc_size * num_entries; | |
20b1e22d | 358 | new_phys = efi_memmap_alloc(num_entries); |
816e7612 MF |
359 | if (!new_phys) { |
360 | pr_err("Failed to allocate new EFI memmap\n"); | |
361 | return; | |
362 | } | |
363 | ||
364 | new = memremap(new_phys, new_size, MEMREMAP_WB); | |
365 | if (!new) { | |
366 | pr_err("Failed to map new EFI memmap\n"); | |
367 | return; | |
368 | } | |
369 | ||
370 | /* | |
371 | * Build a new EFI memmap that excludes any boot services | |
372 | * regions that are not tagged EFI_MEMORY_RUNTIME, since those | |
373 | * regions have now been freed. | |
374 | */ | |
375 | new_md = new; | |
376 | for_each_efi_memory_desc(md) { | |
377 | if (!(md->attribute & EFI_MEMORY_RUNTIME) && | |
378 | (md->type == EFI_BOOT_SERVICES_CODE || | |
379 | md->type == EFI_BOOT_SERVICES_DATA)) | |
380 | continue; | |
381 | ||
382 | memcpy(new_md, md, efi.memmap.desc_size); | |
383 | new_md += efi.memmap.desc_size; | |
384 | } | |
385 | ||
386 | memunmap(new); | |
387 | ||
388 | if (efi_memmap_install(new_phys, num_entries)) { | |
389 | pr_err("Could not install new EFI memmap\n"); | |
390 | return; | |
391 | } | |
eeb9db09 ST |
392 | } |
393 | ||
394 | /* | |
395 | * A number of config table entries get remapped to virtual addresses | |
396 | * after entering EFI virtual mode. However, the kexec kernel requires | |
397 | * their physical addresses therefore we pass them via setup_data and | |
398 | * correct those entries to their respective physical addresses here. | |
399 | * | |
400 | * Currently only handles smbios which is necessary for some firmware | |
401 | * implementation. | |
402 | */ | |
403 | int __init efi_reuse_config(u64 tables, int nr_tables) | |
404 | { | |
405 | int i, sz, ret = 0; | |
406 | void *p, *tablep; | |
407 | struct efi_setup_data *data; | |
408 | ||
409 | if (!efi_setup) | |
410 | return 0; | |
411 | ||
412 | if (!efi_enabled(EFI_64BIT)) | |
413 | return 0; | |
414 | ||
415 | data = early_memremap(efi_setup, sizeof(*data)); | |
416 | if (!data) { | |
417 | ret = -ENOMEM; | |
418 | goto out; | |
419 | } | |
420 | ||
421 | if (!data->smbios) | |
422 | goto out_memremap; | |
423 | ||
424 | sz = sizeof(efi_config_table_64_t); | |
425 | ||
426 | p = tablep = early_memremap(tables, nr_tables * sz); | |
427 | if (!p) { | |
428 | pr_err("Could not map Configuration table!\n"); | |
429 | ret = -ENOMEM; | |
430 | goto out_memremap; | |
431 | } | |
432 | ||
433 | for (i = 0; i < efi.systab->nr_tables; i++) { | |
434 | efi_guid_t guid; | |
435 | ||
436 | guid = ((efi_config_table_64_t *)p)->guid; | |
437 | ||
438 | if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) | |
439 | ((efi_config_table_64_t *)p)->table = data->smbios; | |
440 | p += sz; | |
441 | } | |
98a716b6 | 442 | early_memunmap(tablep, nr_tables * sz); |
eeb9db09 ST |
443 | |
444 | out_memremap: | |
98a716b6 | 445 | early_memunmap(data, sizeof(*data)); |
eeb9db09 ST |
446 | out: |
447 | return ret; | |
448 | } | |
449 | ||
d394f2d9 AT |
450 | static const struct dmi_system_id sgi_uv1_dmi[] = { |
451 | { NULL, "SGI UV1", | |
452 | { DMI_MATCH(DMI_PRODUCT_NAME, "Stoutland Platform"), | |
453 | DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), | |
454 | DMI_MATCH(DMI_BIOS_VENDOR, "SGI.COM"), | |
455 | } | |
456 | }, | |
457 | { } /* NULL entry stops DMI scanning */ | |
458 | }; | |
459 | ||
eeb9db09 ST |
460 | void __init efi_apply_memmap_quirks(void) |
461 | { | |
462 | /* | |
463 | * Once setup is done earlier, unmap the EFI memory map on mismatched | |
464 | * firmware/kernel architectures since there is no support for runtime | |
465 | * services. | |
466 | */ | |
467 | if (!efi_runtime_supported()) { | |
26d7f65f | 468 | pr_info("Setup done, disabling due to 32/64-bit mismatch\n"); |
9479c7ce | 469 | efi_memmap_unmap(); |
eeb9db09 ST |
470 | } |
471 | ||
d394f2d9 AT |
472 | /* UV2+ BIOS has a fix for this issue. UV1 still needs the quirk. */ |
473 | if (dmi_check_system(sgi_uv1_dmi)) | |
eeb9db09 ST |
474 | set_bit(EFI_OLD_MEMMAP, &efi.flags); |
475 | } | |
44be28e9 MF |
476 | |
477 | /* | |
478 | * For most modern platforms the preferred method of powering off is via | |
479 | * ACPI. However, there are some that are known to require the use of | |
480 | * EFI runtime services and for which ACPI does not work at all. | |
481 | * | |
482 | * Using EFI is a last resort, to be used only if no other option | |
483 | * exists. | |
484 | */ | |
485 | bool efi_reboot_required(void) | |
486 | { | |
487 | if (!acpi_gbl_reduced_hardware) | |
488 | return false; | |
489 | ||
490 | efi_reboot_quirk_mode = EFI_RESET_WARM; | |
491 | return true; | |
492 | } | |
493 | ||
494 | bool efi_poweroff_required(void) | |
495 | { | |
13737181 | 496 | return acpi_gbl_reduced_hardware || acpi_no_s5; |
44be28e9 | 497 | } |