2 Memory Detection for Virtual Machines.
4 Copyright (c) 2020, Rebecca Cran <rebecca@bsdio.com>
5 Copyright (c) 2006 - 2016, Intel Corporation. All rights reserved.<BR>
7 SPDX-License-Identifier: BSD-2-Clause-Patent
16 // The package level header files this module uses
18 #include <IndustryStandard/E820.h>
19 #include <IndustryStandard/Q35MchIch9.h>
23 // The Library classes this module consumes
25 #include <Library/BaseLib.h>
26 #include <Library/BaseMemoryLib.h>
27 #include <Library/DebugLib.h>
28 #include <Library/HobLib.h>
29 #include <Library/IoLib.h>
30 #include <Library/PcdLib.h>
31 #include <Library/PciLib.h>
32 #include <Library/PeimEntryPoint.h>
33 #include <Library/ResourcePublicationLib.h>
34 #include <Library/MtrrLib.h>
39 UINT8 mPhysMemAddressWidth
;
41 STATIC UINT32 mS3AcpiReservedMemoryBase
;
42 STATIC UINT32 mS3AcpiReservedMemorySize
;
44 STATIC UINT16 mQ35TsegMbytes
;
46 BOOLEAN mQ35SmramAtDefaultSmbase
= FALSE
;
49 Q35TsegMbytesInitialization (
53 UINT16 ExtendedTsegMbytes
;
54 RETURN_STATUS PcdStatus
;
56 if (mHostBridgeDevId
!= INTEL_Q35_MCH_DEVICE_ID
) {
59 "%a: no TSEG (SMRAM) on host bridge DID=0x%04x; "
60 "only DID=0x%04x (Q35) is supported\n",
63 INTEL_Q35_MCH_DEVICE_ID
70 // Check if QEMU offers an extended TSEG.
72 // This can be seen from writing MCH_EXT_TSEG_MB_QUERY to the MCH_EXT_TSEG_MB
73 // register, and reading back the register.
75 // On a QEMU machine type that does not offer an extended TSEG, the initial
76 // write overwrites whatever value a malicious guest OS may have placed in
77 // the (unimplemented) register, before entering S3 or rebooting.
78 // Subsequently, the read returns MCH_EXT_TSEG_MB_QUERY unchanged.
80 // On a QEMU machine type that offers an extended TSEG, the initial write
81 // triggers an update to the register. Subsequently, the value read back
82 // (which is guaranteed to differ from MCH_EXT_TSEG_MB_QUERY) tells us the
83 // number of megabytes.
85 PciWrite16 (DRAMC_REGISTER_Q35 (MCH_EXT_TSEG_MB
), MCH_EXT_TSEG_MB_QUERY
);
86 ExtendedTsegMbytes
= PciRead16 (DRAMC_REGISTER_Q35 (MCH_EXT_TSEG_MB
));
87 if (ExtendedTsegMbytes
== MCH_EXT_TSEG_MB_QUERY
) {
88 mQ35TsegMbytes
= PcdGet16 (PcdQ35TsegMbytes
);
94 "%a: QEMU offers an extended TSEG (%d MB)\n",
98 PcdStatus
= PcdSet16S (PcdQ35TsegMbytes
, ExtendedTsegMbytes
);
99 ASSERT_RETURN_ERROR (PcdStatus
);
100 mQ35TsegMbytes
= ExtendedTsegMbytes
;
105 GetSystemMemorySizeBelow4gb (
113 // CMOS 0x34/0x35 specifies the system memory above 16 MB.
114 // * CMOS(0x35) is the high byte
115 // * CMOS(0x34) is the low byte
116 // * The size is specified in 64kb chunks
117 // * Since this is memory above 16MB, the 16MB must be added
118 // into the calculation to get the total memory size.
121 Cmos0x34
= (UINT8
) CmosRead8 (0x34);
122 Cmos0x35
= (UINT8
) CmosRead8 (0x35);
124 return (UINT32
) (((UINTN
)((Cmos0x35
<< 8) + Cmos0x34
) << 16) + SIZE_16MB
);
130 GetSystemMemorySizeAbove4gb (
137 // CMOS 0x5b-0x5d specifies the system memory above 4GB MB.
138 // * CMOS(0x5d) is the most significant size byte
139 // * CMOS(0x5c) is the middle size byte
140 // * CMOS(0x5b) is the least significant size byte
141 // * The size is specified in 64kb chunks
145 for (CmosIndex
= 0x5d; CmosIndex
>= 0x5b; CmosIndex
--) {
146 Size
= (UINT32
) (Size
<< 8) + (UINT32
) CmosRead8 (CmosIndex
);
149 return LShiftU64 (Size
, 16);
154 Return the highest address that DXE could possibly use, plus one.
162 UINT64 FirstNonAddress
;
163 UINT64 Pci64Base
, Pci64Size
;
164 RETURN_STATUS PcdStatus
;
166 FirstNonAddress
= BASE_4GB
+ GetSystemMemorySizeAbove4gb ();
169 // If DXE is 32-bit, then we're done; PciBusDxe will degrade 64-bit MMIO
170 // resources to 32-bit anyway. See DegradeResource() in
171 // "PciResourceSupport.c".
174 if (!FeaturePcdGet (PcdDxeIplSwitchToLongMode
)) {
175 return FirstNonAddress
;
180 // Otherwise, in order to calculate the highest address plus one, we must
181 // consider the 64-bit PCI host aperture too. Fetch the default size.
183 Pci64Size
= PcdGet64 (PcdPciMmio64Size
);
185 if (Pci64Size
== 0) {
186 if (mBootMode
!= BOOT_ON_S3_RESUME
) {
187 DEBUG ((DEBUG_INFO
, "%a: disabling 64-bit PCI host aperture\n",
189 PcdStatus
= PcdSet64S (PcdPciMmio64Size
, 0);
190 ASSERT_RETURN_ERROR (PcdStatus
);
194 // There's nothing more to do; the amount of memory above 4GB fully
195 // determines the highest address plus one. The memory hotplug area (see
196 // below) plays no role for the firmware in this case.
198 return FirstNonAddress
;
202 // SeaBIOS aligns both boundaries of the 64-bit PCI host aperture to 1GB, so
203 // that the host can map it with 1GB hugepages. Follow suit.
205 Pci64Base
= ALIGN_VALUE (FirstNonAddress
, (UINT64
)SIZE_1GB
);
206 Pci64Size
= ALIGN_VALUE (Pci64Size
, (UINT64
)SIZE_1GB
);
209 // The 64-bit PCI host aperture should also be "naturally" aligned. The
210 // alignment is determined by rounding the size of the aperture down to the
211 // next smaller or equal power of two. That is, align the aperture by the
212 // largest BAR size that can fit into it.
214 Pci64Base
= ALIGN_VALUE (Pci64Base
, GetPowerOfTwo64 (Pci64Size
));
216 if (mBootMode
!= BOOT_ON_S3_RESUME
) {
218 // The core PciHostBridgeDxe driver will automatically add this range to
219 // the GCD memory space map through our PciHostBridgeLib instance; here we
220 // only need to set the PCDs.
222 PcdStatus
= PcdSet64S (PcdPciMmio64Base
, Pci64Base
);
223 ASSERT_RETURN_ERROR (PcdStatus
);
224 PcdStatus
= PcdSet64S (PcdPciMmio64Size
, Pci64Size
);
225 ASSERT_RETURN_ERROR (PcdStatus
);
227 DEBUG ((DEBUG_INFO
, "%a: Pci64Base=0x%Lx Pci64Size=0x%Lx\n",
228 __FUNCTION__
, Pci64Base
, Pci64Size
));
232 // The useful address space ends with the 64-bit PCI host aperture.
234 FirstNonAddress
= Pci64Base
+ Pci64Size
;
235 return FirstNonAddress
;
240 Initialize the mPhysMemAddressWidth variable, based on guest RAM size.
243 AddressWidthInitialization (
247 UINT64 FirstNonAddress
;
250 // As guest-physical memory size grows, the permanent PEI RAM requirements
251 // are dominated by the identity-mapping page tables built by the DXE IPL.
252 // The DXL IPL keys off of the physical address bits advertized in the CPU
253 // HOB. To conserve memory, we calculate the minimum address width here.
255 FirstNonAddress
= GetFirstNonAddress ();
256 mPhysMemAddressWidth
= (UINT8
)HighBitSet64 (FirstNonAddress
);
259 // If FirstNonAddress is not an integral power of two, then we need an
262 if ((FirstNonAddress
& (FirstNonAddress
- 1)) != 0) {
263 ++mPhysMemAddressWidth
;
267 // The minimum address width is 36 (covers up to and excluding 64 GB, which
268 // is the maximum for Ia32 + PAE). The theoretical architecture maximum for
269 // X64 long mode is 52 bits, but the DXE IPL clamps that down to 48 bits. We
270 // can simply assert that here, since 48 bits are good enough for 256 TB.
272 if (mPhysMemAddressWidth
<= 36) {
273 mPhysMemAddressWidth
= 36;
275 ASSERT (mPhysMemAddressWidth
<= 48);
280 Calculate the cap for the permanent PEI memory.
288 BOOLEAN Page1GSupport
;
296 // If DXE is 32-bit, then just return the traditional 64 MB cap.
299 if (!FeaturePcdGet (PcdDxeIplSwitchToLongMode
)) {
305 // Dependent on physical address width, PEI memory allocations can be
306 // dominated by the page tables built for 64-bit DXE. So we key the cap off
307 // of those. The code below is based on CreateIdentityMappingPageTables() in
308 // "MdeModulePkg/Core/DxeIplPeim/X64/VirtualMemory.c".
310 Page1GSupport
= FALSE
;
311 if (PcdGetBool (PcdUse1GPageTable
)) {
312 AsmCpuid (0x80000000, &RegEax
, NULL
, NULL
, NULL
);
313 if (RegEax
>= 0x80000001) {
314 AsmCpuid (0x80000001, NULL
, NULL
, NULL
, &RegEdx
);
315 if ((RegEdx
& BIT26
) != 0) {
316 Page1GSupport
= TRUE
;
321 if (mPhysMemAddressWidth
<= 39) {
323 PdpEntries
= 1 << (mPhysMemAddressWidth
- 30);
324 ASSERT (PdpEntries
<= 0x200);
326 Pml4Entries
= 1 << (mPhysMemAddressWidth
- 39);
327 ASSERT (Pml4Entries
<= 0x200);
331 TotalPages
= Page1GSupport
? Pml4Entries
+ 1 :
332 (PdpEntries
+ 1) * Pml4Entries
+ 1;
333 ASSERT (TotalPages
<= 0x40201);
336 // Add 64 MB for miscellaneous allocations. Note that for
337 // mPhysMemAddressWidth values close to 36, the cap will actually be
338 // dominated by this increment.
340 return (UINT32
)(EFI_PAGES_TO_SIZE (TotalPages
) + SIZE_64MB
);
345 Publish PEI core memory
347 @return EFI_SUCCESS The PEIM initialized successfully.
356 EFI_PHYSICAL_ADDRESS MemoryBase
;
358 UINT32 LowerMemorySize
;
361 LowerMemorySize
= GetSystemMemorySizeBelow4gb ();
362 if (FeaturePcdGet (PcdSmmSmramRequire
)) {
364 // TSEG is chipped from the end of low RAM
366 LowerMemorySize
-= mQ35TsegMbytes
* SIZE_1MB
;
370 // If S3 is supported, then the S3 permanent PEI memory is placed next,
371 // downwards. Its size is primarily dictated by CpuMpPei. The formula below
372 // is an approximation.
375 mS3AcpiReservedMemorySize
= SIZE_512KB
+
377 PcdGet32 (PcdCpuApStackSize
);
378 mS3AcpiReservedMemoryBase
= LowerMemorySize
- mS3AcpiReservedMemorySize
;
379 LowerMemorySize
= mS3AcpiReservedMemoryBase
;
382 if (mBootMode
== BOOT_ON_S3_RESUME
) {
383 MemoryBase
= mS3AcpiReservedMemoryBase
;
384 MemorySize
= mS3AcpiReservedMemorySize
;
386 PeiMemoryCap
= GetPeiMemoryCap ();
387 DEBUG ((DEBUG_INFO
, "%a: mPhysMemAddressWidth=%d PeiMemoryCap=%u KB\n",
388 __FUNCTION__
, mPhysMemAddressWidth
, PeiMemoryCap
>> 10));
391 // Determine the range of memory to use during PEI
393 // Technically we could lay the permanent PEI RAM over SEC's temporary
394 // decompression and scratch buffer even if "secure S3" is needed, since
395 // their lifetimes don't overlap. However, PeiFvInitialization() will cover
396 // RAM up to PcdOvmfDecompressionScratchEnd with an EfiACPIMemoryNVS memory
397 // allocation HOB, and other allocations served from the permanent PEI RAM
398 // shouldn't overlap with that HOB.
400 MemoryBase
= mS3Supported
&& FeaturePcdGet (PcdSmmSmramRequire
) ?
401 PcdGet32 (PcdOvmfDecompressionScratchEnd
) :
402 PcdGet32 (PcdOvmfDxeMemFvBase
) + PcdGet32 (PcdOvmfDxeMemFvSize
);
403 MemorySize
= LowerMemorySize
- MemoryBase
;
404 if (MemorySize
> PeiMemoryCap
) {
405 MemoryBase
= LowerMemorySize
- PeiMemoryCap
;
406 MemorySize
= PeiMemoryCap
;
411 // Publish this memory to the PEI Core
413 Status
= PublishSystemMemory(MemoryBase
, MemorySize
);
414 ASSERT_EFI_ERROR (Status
);
421 Peform Memory Detection for QEMU / KVM
430 UINT64 LowerMemorySize
;
431 UINT64 UpperMemorySize
;
432 MTRR_SETTINGS MtrrSettings
;
435 DEBUG ((DEBUG_INFO
, "%a called\n", __FUNCTION__
));
438 // Determine total memory size available
440 LowerMemorySize
= GetSystemMemorySizeBelow4gb ();
441 UpperMemorySize
= GetSystemMemorySizeAbove4gb ();
443 if (mBootMode
== BOOT_ON_S3_RESUME
) {
445 // Create the following memory HOB as an exception on the S3 boot path.
447 // Normally we'd create memory HOBs only on the normal boot path. However,
448 // CpuMpPei specifically needs such a low-memory HOB on the S3 path as
449 // well, for "borrowing" a subset of it temporarily, for the AP startup
452 // CpuMpPei saves the original contents of the borrowed area in permanent
453 // PEI RAM, in a backup buffer allocated with the normal PEI services.
454 // CpuMpPei restores the original contents ("returns" the borrowed area) at
455 // End-of-PEI. End-of-PEI in turn is emitted by S3Resume2Pei before
456 // transferring control to the OS's wakeup vector in the FACS.
458 // We expect any other PEIMs that "borrow" memory similarly to CpuMpPei to
459 // restore the original contents. Furthermore, we expect all such PEIMs
460 // (CpuMpPei included) to claim the borrowed areas by producing memory
461 // allocation HOBs, and to honor preexistent memory allocation HOBs when
462 // looking for an area to borrow.
464 AddMemoryRangeHob (0, BASE_512KB
+ BASE_128KB
);
467 // Create memory HOBs
469 AddMemoryRangeHob (0, BASE_512KB
+ BASE_128KB
);
471 if (FeaturePcdGet (PcdSmmSmramRequire
)) {
474 TsegSize
= mQ35TsegMbytes
* SIZE_1MB
;
475 AddMemoryRangeHob (BASE_1MB
, LowerMemorySize
- TsegSize
);
476 AddReservedMemoryBaseSizeHob (LowerMemorySize
- TsegSize
, TsegSize
,
479 AddMemoryRangeHob (BASE_1MB
, LowerMemorySize
);
482 if (UpperMemorySize
!= 0) {
483 AddMemoryBaseSizeHob (BASE_4GB
, UpperMemorySize
);
488 // We'd like to keep the following ranges uncached:
490 // - [LowerMemorySize, 4 GB)
492 // Everything else should be WB. Unfortunately, programming the inverse (ie.
493 // keeping the default UC, and configuring the complement set of the above as
494 // WB) is not reliable in general, because the end of the upper RAM can have
495 // practically any alignment, and we may not have enough variable MTRRs to
498 if (IsMtrrSupported ()) {
499 MtrrGetAllMtrrs (&MtrrSettings
);
502 // MTRRs disabled, fixed MTRRs disabled, default type is uncached
504 ASSERT ((MtrrSettings
.MtrrDefType
& BIT11
) == 0);
505 ASSERT ((MtrrSettings
.MtrrDefType
& BIT10
) == 0);
506 ASSERT ((MtrrSettings
.MtrrDefType
& 0xFF) == 0);
509 // flip default type to writeback
511 SetMem (&MtrrSettings
.Fixed
, sizeof MtrrSettings
.Fixed
, 0x06);
512 ZeroMem (&MtrrSettings
.Variables
, sizeof MtrrSettings
.Variables
);
513 MtrrSettings
.MtrrDefType
|= BIT11
| BIT10
| 6;
514 MtrrSetAllMtrrs (&MtrrSettings
);
517 // Set memory range from 640KB to 1MB to uncacheable
519 Status
= MtrrSetMemoryAttribute (BASE_512KB
+ BASE_128KB
,
520 BASE_1MB
- (BASE_512KB
+ BASE_128KB
), CacheUncacheable
);
521 ASSERT_EFI_ERROR (Status
);
524 // Set memory range from the "top of lower RAM" (RAM below 4GB) to 4GB as
527 Status
= MtrrSetMemoryAttribute (LowerMemorySize
,
528 SIZE_4GB
- LowerMemorySize
, CacheUncacheable
);
529 ASSERT_EFI_ERROR (Status
);
534 Publish system RAM and reserve memory regions
538 InitializeRamRegions (
542 QemuInitializeRam ();
544 if (mS3Supported
&& mBootMode
!= BOOT_ON_S3_RESUME
) {
546 // This is the memory range that will be used for PEI on S3 resume
548 BuildMemoryAllocationHob (
549 mS3AcpiReservedMemoryBase
,
550 mS3AcpiReservedMemorySize
,
555 // Cover the initial RAM area used as stack and temporary PEI heap.
557 // This is reserved as ACPI NVS so it can be used on S3 resume.
559 BuildMemoryAllocationHob (
560 PcdGet32 (PcdOvmfSecPeiTempRamBase
),
561 PcdGet32 (PcdOvmfSecPeiTempRamSize
),
566 // SEC stores its table of GUIDed section handlers here.
568 BuildMemoryAllocationHob (
569 PcdGet64 (PcdGuidedExtractHandlerTableAddress
),
570 PcdGet32 (PcdGuidedExtractHandlerTableSize
),
576 // Reserve the initial page tables built by the reset vector code.
578 // Since this memory range will be used by the Reset Vector on S3
579 // resume, it must be reserved as ACPI NVS.
581 BuildMemoryAllocationHob (
582 (EFI_PHYSICAL_ADDRESS
)(UINTN
) PcdGet32 (PcdOvmfSecPageTablesBase
),
583 (UINT64
)(UINTN
) PcdGet32 (PcdOvmfSecPageTablesSize
),
589 if (mBootMode
!= BOOT_ON_S3_RESUME
) {
590 if (!FeaturePcdGet (PcdSmmSmramRequire
)) {
592 // Reserve the lock box storage area
594 // Since this memory range will be used on S3 resume, it must be
595 // reserved as ACPI NVS.
597 // If S3 is unsupported, then various drivers might still write to the
598 // LockBox area. We ought to prevent DXE from serving allocation requests
599 // such that they would overlap the LockBox storage.
602 (VOID
*)(UINTN
) PcdGet32 (PcdOvmfLockBoxStorageBase
),
603 (UINTN
) PcdGet32 (PcdOvmfLockBoxStorageSize
)
605 BuildMemoryAllocationHob (
606 (EFI_PHYSICAL_ADDRESS
)(UINTN
) PcdGet32 (PcdOvmfLockBoxStorageBase
),
607 (UINT64
)(UINTN
) PcdGet32 (PcdOvmfLockBoxStorageSize
),
608 mS3Supported
? EfiACPIMemoryNVS
: EfiBootServicesData
612 if (FeaturePcdGet (PcdSmmSmramRequire
)) {
616 // Make sure the TSEG area that we reported as a reserved memory resource
617 // cannot be used for reserved memory allocations.
619 TsegSize
= mQ35TsegMbytes
* SIZE_1MB
;
620 BuildMemoryAllocationHob (
621 GetSystemMemorySizeBelow4gb() - TsegSize
,
623 EfiReservedMemoryType