]> git.proxmox.com Git - mirror_edk2.git/blobdiff - UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c
UefiCpuPkg: Refactor initialization of CPU features during S3 resume
[mirror_edk2.git] / UefiCpuPkg / Library / RegisterCpuFeaturesLib / CpuFeaturesInitialize.c
index bc372a338f0b81f0bbc869afacb5e3cf0f1c964e..6e2ab7951821b76444b1597e015c9770e7d39053 100644 (file)
@@ -1,36 +1,30 @@
 /** @file\r
   CPU Features Initialize functions.\r
 \r
-  Copyright (c) 2017 - 2018, Intel Corporation. All rights reserved.<BR>\r
-  This program and the accompanying materials\r
-  are licensed and made available under the terms and conditions of the BSD License\r
-  which accompanies this distribution.  The full text of the license may be found at\r
-  http://opensource.org/licenses/bsd-license.php\r
-\r
-  THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
-  WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
+  Copyright (c) 2017 - 2021, Intel Corporation. All rights reserved.<BR>\r
+  SPDX-License-Identifier: BSD-2-Clause-Patent\r
 \r
 **/\r
 \r
 #include "RegisterCpuFeatures.h"\r
 \r
 CHAR16 *mDependTypeStr[]   = {L"None", L"Thread", L"Core", L"Package", L"Invalid" };\r
-CHAR16 *mRegisterTypeStr[] = {L"MSR", L"CR", L"MMIO", L"CACHE", L"SEMAP", L"INVALID" };\r
 \r
 /**\r
   Worker function to save PcdCpuFeaturesCapability.\r
 \r
   @param[in]  SupportedFeatureMask  The pointer to CPU feature bits mask buffer\r
+  @param[in]  BitMaskSize           CPU feature bits mask buffer size.\r
+\r
 **/\r
 VOID\r
 SetCapabilityPcd (\r
-  IN UINT8               *SupportedFeatureMask\r
+  IN UINT8               *SupportedFeatureMask,\r
+  IN UINTN               BitMaskSize\r
   )\r
 {\r
   EFI_STATUS             Status;\r
-  UINTN                  BitMaskSize;\r
 \r
-  BitMaskSize = PcdGetSize (PcdCpuFeaturesCapability);\r
   Status = PcdSetPtrS (PcdCpuFeaturesCapability, &BitMaskSize, SupportedFeatureMask);\r
   ASSERT_EFI_ERROR (Status);\r
 }\r
@@ -39,62 +33,20 @@ SetCapabilityPcd (
   Worker function to save PcdCpuFeaturesSetting.\r
 \r
   @param[in]  SupportedFeatureMask  The pointer to CPU feature bits mask buffer\r
+  @param[in]  BitMaskSize           CPU feature bits mask buffer size.\r
 **/\r
 VOID\r
 SetSettingPcd (\r
-  IN UINT8               *SupportedFeatureMask\r
+  IN UINT8               *SupportedFeatureMask,\r
+  IN UINTN               BitMaskSize\r
   )\r
 {\r
   EFI_STATUS             Status;\r
-  UINTN                  BitMaskSize;\r
 \r
-  BitMaskSize = PcdGetSize (PcdCpuFeaturesSetting);\r
   Status = PcdSetPtrS (PcdCpuFeaturesSetting, &BitMaskSize, SupportedFeatureMask);\r
   ASSERT_EFI_ERROR (Status);\r
 }\r
 \r
-/**\r
-  Worker function to get PcdCpuFeaturesSupport.\r
-\r
-  @return  The pointer to CPU feature bits mask buffer.\r
-**/\r
-UINT8 *\r
-GetSupportPcd (\r
-  VOID\r
-  )\r
-{\r
-  UINT8                  *SupportBitMask;\r
-\r
-  SupportBitMask = AllocateCopyPool (\r
-          PcdGetSize (PcdCpuFeaturesSupport),\r
-          PcdGetPtr (PcdCpuFeaturesSupport)\r
-          );\r
-  ASSERT (SupportBitMask != NULL);\r
-\r
-  return SupportBitMask;\r
-}\r
-\r
-/**\r
-  Worker function to get PcdCpuFeaturesUserConfiguration.\r
-\r
-  @return  The pointer to CPU feature bits mask buffer.\r
-**/\r
-UINT8 *\r
-GetConfigurationPcd (\r
-  VOID\r
-  )\r
-{\r
-  UINT8                  *SupportBitMask;\r
-\r
-  SupportBitMask = AllocateCopyPool (\r
-          PcdGetSize (PcdCpuFeaturesUserConfiguration),\r
-          PcdGetPtr (PcdCpuFeaturesUserConfiguration)\r
-          );\r
-  ASSERT (SupportBitMask != NULL);\r
-\r
-  return SupportBitMask;\r
-}\r
-\r
 /**\r
   Collects CPU type and feature information.\r
 \r
@@ -134,11 +86,10 @@ FillProcessorInfo (
 /**\r
   Prepares for private data used for CPU features.\r
 \r
-  @param[in]  NumberOfCpus  Number of processor in system\r
 **/\r
 VOID\r
 CpuInitDataInitialize (\r
-  IN UINTN                             NumberOfCpus\r
+  VOID\r
   )\r
 {\r
   EFI_STATUS                           Status;\r
@@ -152,19 +103,35 @@ CpuInitDataInitialize (
   UINT32                               Package;\r
   UINT32                               Thread;\r
   EFI_CPU_PHYSICAL_LOCATION            *Location;\r
-  BOOLEAN                              *CoresVisited;\r
-  UINTN                                Index;\r
+  UINT32                               PackageIndex;\r
+  UINT32                               CoreIndex;\r
+  UINTN                                Pages;\r
+  UINT32                               FirstPackage;\r
+  UINT32                               *FirstCore;\r
+  UINT32                               *FirstThread;\r
   ACPI_CPU_DATA                        *AcpiCpuData;\r
   CPU_STATUS_INFORMATION               *CpuStatus;\r
-  UINT32                               *ValidCoreCountPerPackage;\r
+  UINT32                               *ThreadCountPerPackage;\r
+  UINT8                                *ThreadCountPerCore;\r
+  UINTN                                NumberOfCpus;\r
+  UINTN                                NumberOfEnabledProcessors;\r
 \r
   Core    = 0;\r
   Package = 0;\r
   Thread  = 0;\r
 \r
   CpuFeaturesData = GetCpuFeaturesData ();\r
-  CpuFeaturesData->InitOrder = AllocateZeroPool (sizeof (CPU_FEATURES_INIT_ORDER) * NumberOfCpus);\r
+\r
+  //\r
+  // Initialize CpuFeaturesData->MpService as early as possile, so later function can use it.\r
+  //\r
+  CpuFeaturesData->MpService = GetMpService ();\r
+\r
+  GetNumberOfProcessor (&NumberOfCpus, &NumberOfEnabledProcessors);\r
+\r
+  CpuFeaturesData->InitOrder = AllocatePages (EFI_SIZE_TO_PAGES (sizeof (CPU_FEATURES_INIT_ORDER) * NumberOfCpus));\r
   ASSERT (CpuFeaturesData->InitOrder != NULL);\r
+  ZeroMem (CpuFeaturesData->InitOrder, sizeof (CPU_FEATURES_INIT_ORDER) * NumberOfCpus);\r
 \r
   //\r
   // Collect CPU Features information\r
@@ -185,10 +152,10 @@ CpuInitDataInitialize (
   ASSERT (AcpiCpuData != NULL);\r
   CpuFeaturesData->AcpiCpuData= AcpiCpuData;\r
 \r
-  CpuStatus = &AcpiCpuData->CpuStatus;\r
+  CpuStatus = &AcpiCpuData->CpuFeatureInitData.CpuStatus;\r
   Location = AllocateZeroPool (sizeof (EFI_CPU_PHYSICAL_LOCATION) * NumberOfCpus);\r
   ASSERT (Location != NULL);\r
-  AcpiCpuData->ApLocation = (EFI_PHYSICAL_ADDRESS)(UINTN)Location;\r
+  AcpiCpuData->CpuFeatureInitData.ApLocation = (EFI_PHYSICAL_ADDRESS)(UINTN)Location;\r
 \r
   for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {\r
     InitOrder = &CpuFeaturesData->InitOrder[ProcessorNumber];\r
@@ -238,45 +205,104 @@ CpuInitDataInitialize (
   //\r
   // Collect valid core count in each package because not all cores are valid.\r
   //\r
-  ValidCoreCountPerPackage= AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount);\r
-  ASSERT (ValidCoreCountPerPackage != 0);\r
-  CpuStatus->ValidCoreCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)ValidCoreCountPerPackage;\r
-  CoresVisited = AllocatePool (sizeof (BOOLEAN) * CpuStatus->MaxCoreCount);\r
-  ASSERT (CoresVisited != NULL);\r
-\r
-  for (Index = 0; Index < CpuStatus->PackageCount; Index ++ ) {\r
-    ZeroMem (CoresVisited, sizeof (BOOLEAN) * CpuStatus->MaxCoreCount);\r
-    //\r
-    // Collect valid cores in Current package.\r
-    //\r
-    for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {\r
-      Location = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.ProcessorInfo.Location;\r
-      if (Location->Package == Index && !CoresVisited[Location->Core] ) {\r
-        //\r
-        // The ValidCores position for Location->Core is valid.\r
-        // The possible values in ValidCores[Index] are 0 or 1.\r
-        // FALSE means no valid threads in this Core.\r
-        // TRUE means have valid threads in this core, no matter the thead count is 1 or more.\r
-        //\r
-        CoresVisited[Location->Core] = TRUE;\r
-        ValidCoreCountPerPackage[Index]++;\r
+  ThreadCountPerPackage = AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount);\r
+  ASSERT (ThreadCountPerPackage != NULL);\r
+  CpuStatus->ThreadCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)ThreadCountPerPackage;\r
+\r
+  ThreadCountPerCore = AllocateZeroPool (sizeof (UINT8) * CpuStatus->PackageCount * CpuStatus->MaxCoreCount);\r
+  ASSERT (ThreadCountPerCore != NULL);\r
+  CpuStatus->ThreadCountPerCore = (EFI_PHYSICAL_ADDRESS)(UINTN)ThreadCountPerCore;\r
+\r
+  for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {\r
+    Location = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.ProcessorInfo.Location;\r
+    ThreadCountPerPackage[Location->Package]++;\r
+    ThreadCountPerCore[Location->Package * CpuStatus->MaxCoreCount + Location->Core]++;\r
+  }\r
+\r
+  for (PackageIndex = 0; PackageIndex < CpuStatus->PackageCount; PackageIndex++) {\r
+    if (ThreadCountPerPackage[PackageIndex] != 0) {\r
+      DEBUG ((DEBUG_INFO, "P%02d: Thread Count = %d\n", PackageIndex, ThreadCountPerPackage[PackageIndex]));\r
+      for (CoreIndex = 0; CoreIndex < CpuStatus->MaxCoreCount; CoreIndex++) {\r
+        if (ThreadCountPerCore[PackageIndex * CpuStatus->MaxCoreCount + CoreIndex] != 0) {\r
+          DEBUG ((\r
+            DEBUG_INFO, "  P%02d C%04d, Thread Count = %d\n", PackageIndex, CoreIndex,\r
+            ThreadCountPerCore[PackageIndex * CpuStatus->MaxCoreCount + CoreIndex]\r
+            ));\r
+        }\r
       }\r
     }\r
   }\r
-  FreePool (CoresVisited);\r
 \r
-  for (Index = 0; Index <= Package; Index++) {\r
-    DEBUG ((DEBUG_INFO, "Package: %d, Valid Core : %d\n", Index, ValidCoreCountPerPackage[Index]));\r
-  }\r
+  CpuFeaturesData->CpuFlags.CoreSemaphoreCount = AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount * CpuStatus->MaxCoreCount * CpuStatus->MaxThreadCount);\r
+  ASSERT (CpuFeaturesData->CpuFlags.CoreSemaphoreCount != NULL);\r
+  CpuFeaturesData->CpuFlags.PackageSemaphoreCount = AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount * CpuStatus->MaxCoreCount * CpuStatus->MaxThreadCount);\r
+  ASSERT (CpuFeaturesData->CpuFlags.PackageSemaphoreCount != NULL);\r
+\r
+  //\r
+  // Initialize CpuFeaturesData->InitOrder[].CpuInfo.First\r
+  // Use AllocatePages () instead of AllocatePool () because pool cannot be freed in PEI phase but page can.\r
+  //\r
+  Pages     = EFI_SIZE_TO_PAGES (CpuStatus->PackageCount * sizeof (UINT32) + CpuStatus->PackageCount * CpuStatus->MaxCoreCount * sizeof (UINT32));\r
+  FirstCore = AllocatePages (Pages);\r
+  ASSERT (FirstCore != NULL);\r
+  FirstThread  = FirstCore + CpuStatus->PackageCount;\r
+\r
+  //\r
+  // Set FirstPackage, FirstCore[], FirstThread[] to maximum package ID, core ID, thread ID.\r
+  //\r
+  FirstPackage = MAX_UINT32;\r
+  SetMem32 (FirstCore,   CpuStatus->PackageCount * sizeof (UINT32), MAX_UINT32);\r
+  SetMem32 (FirstThread, CpuStatus->PackageCount * CpuStatus->MaxCoreCount * sizeof (UINT32), MAX_UINT32);\r
+\r
+  for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {\r
+    Location = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.ProcessorInfo.Location;\r
+\r
+    //\r
+    // Save the minimum package ID in the platform.\r
+    //\r
+    FirstPackage                 = MIN (Location->Package, FirstPackage);\r
 \r
-  CpuFeaturesData->CpuFlags.SemaphoreCount = AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount * CpuStatus->MaxCoreCount * CpuStatus->MaxThreadCount);\r
-  ASSERT (CpuFeaturesData->CpuFlags.SemaphoreCount != NULL);\r
+    //\r
+    // Save the minimum core ID per package.\r
+    //\r
+    FirstCore[Location->Package] = MIN (Location->Core, FirstCore[Location->Package]);\r
+\r
+    //\r
+    // Save the minimum thread ID per core.\r
+    //\r
+    FirstThread[Location->Package * CpuStatus->MaxCoreCount + Location->Core] = MIN (\r
+      Location->Thread,\r
+      FirstThread[Location->Package * CpuStatus->MaxCoreCount + Location->Core]\r
+    );\r
+  }\r
 \r
   //\r
-  // Get support and configuration PCDs\r
+  // Update the First field.\r
   //\r
-  CpuFeaturesData->SupportPcd       = GetSupportPcd ();\r
-  CpuFeaturesData->ConfigurationPcd = GetConfigurationPcd ();\r
+  for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {\r
+    Location = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.ProcessorInfo.Location;\r
+\r
+    if (Location->Package == FirstPackage) {\r
+      CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.First.Package = 1;\r
+    }\r
+\r
+    //\r
+    // Set First.Die/Tile/Module for each thread assuming:\r
+    //  single Die under each package, single Tile under each Die, single Module under each Tile\r
+    //\r
+    CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.First.Die = 1;\r
+    CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.First.Tile = 1;\r
+    CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.First.Module = 1;\r
+\r
+    if (Location->Core == FirstCore[Location->Package]) {\r
+      CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.First.Core = 1;\r
+    }\r
+    if (Location->Thread == FirstThread[Location->Package * CpuStatus->MaxCoreCount + Location->Core]) {\r
+      CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.First.Thread = 1;\r
+    }\r
+  }\r
+\r
+  FreePages (FirstCore, Pages);\r
 }\r
 \r
 /**\r
@@ -284,19 +310,20 @@ CpuInitDataInitialize (
 \r
   @param[in]  SupportedFeatureMask  The pointer to CPU feature bits mask buffer\r
   @param[in]  OrFeatureBitMask      The feature bit mask to do OR operation\r
+  @param[in]  BitMaskSize           The CPU feature bits mask buffer size.\r
+\r
 **/\r
 VOID\r
 SupportedMaskOr (\r
   IN UINT8               *SupportedFeatureMask,\r
-  IN UINT8               *OrFeatureBitMask\r
+  IN UINT8               *OrFeatureBitMask,\r
+  IN UINT32              BitMaskSize\r
   )\r
 {\r
   UINTN                  Index;\r
-  UINTN                  BitMaskSize;\r
   UINT8                  *Data1;\r
   UINT8                  *Data2;\r
 \r
-  BitMaskSize = PcdGetSize (PcdCpuFeaturesSupport);\r
   Data1 = SupportedFeatureMask;\r
   Data2 = OrFeatureBitMask;\r
   for (Index = 0; Index < BitMaskSize; Index++) {\r
@@ -309,19 +336,20 @@ SupportedMaskOr (
 \r
   @param[in]  SupportedFeatureMask  The pointer to CPU feature bits mask buffer\r
   @param[in]  AndFeatureBitMask     The feature bit mask to do AND operation\r
+  @param[in]  BitMaskSize           CPU feature bits mask buffer size.\r
+\r
 **/\r
 VOID\r
 SupportedMaskAnd (\r
-  IN UINT8               *SupportedFeatureMask,\r
-  IN UINT8               *AndFeatureBitMask\r
+  IN       UINT8               *SupportedFeatureMask,\r
+  IN CONST UINT8               *AndFeatureBitMask,\r
+  IN       UINT32              BitMaskSize\r
   )\r
 {\r
   UINTN                  Index;\r
-  UINTN                  BitMaskSize;\r
   UINT8                  *Data1;\r
-  UINT8                  *Data2;\r
+  CONST UINT8            *Data2;\r
 \r
-  BitMaskSize = PcdGetSize (PcdCpuFeaturesSupport);\r
   Data1 = SupportedFeatureMask;\r
   Data2 = AndFeatureBitMask;\r
   for (Index = 0; Index < BitMaskSize; Index++) {\r
@@ -334,19 +362,19 @@ SupportedMaskAnd (
 \r
   @param[in]  SupportedFeatureMask  The pointer to CPU feature bits mask buffer\r
   @param[in]  AndFeatureBitMask     The feature bit mask to do XOR operation\r
+  @param[in]  BitMaskSize           CPU feature bits mask buffer size.\r
 **/\r
 VOID\r
 SupportedMaskCleanBit (\r
   IN UINT8               *SupportedFeatureMask,\r
-  IN UINT8               *AndFeatureBitMask\r
+  IN UINT8               *AndFeatureBitMask,\r
+  IN UINT32              BitMaskSize\r
   )\r
 {\r
   UINTN                  Index;\r
-  UINTN                  BitMaskSize;\r
   UINT8                  *Data1;\r
   UINT8                  *Data2;\r
 \r
-  BitMaskSize = PcdGetSize (PcdCpuFeaturesSupport);\r
   Data1 = SupportedFeatureMask;\r
   Data2 = AndFeatureBitMask;\r
   for (Index = 0; Index < BitMaskSize; Index++) {\r
@@ -360,6 +388,7 @@ SupportedMaskCleanBit (
 \r
   @param[in]  SupportedFeatureMask   The pointer to CPU feature bits mask buffer\r
   @param[in]  ComparedFeatureBitMask The feature bit mask to be compared\r
+  @param[in]  BitMaskSize            CPU feature bits mask buffer size.\r
 \r
   @retval TRUE   The ComparedFeatureBitMask is set in CPU feature supported bits\r
                  mask buffer.\r
@@ -369,16 +398,14 @@ SupportedMaskCleanBit (
 BOOLEAN\r
 IsBitMaskMatch (\r
   IN UINT8               *SupportedFeatureMask,\r
-  IN UINT8               *ComparedFeatureBitMask\r
+  IN UINT8               *ComparedFeatureBitMask,\r
+  IN UINT32              BitMaskSize\r
   )\r
 {\r
   UINTN                  Index;\r
-  UINTN                  BitMaskSize;\r
   UINT8                  *Data1;\r
   UINT8                  *Data2;\r
 \r
-  BitMaskSize = PcdGetSize (PcdCpuFeaturesSupport);\r
-\r
   Data1 = SupportedFeatureMask;\r
   Data2 = ComparedFeatureBitMask;\r
   for (Index = 0; Index < BitMaskSize; Index++) {\r
@@ -407,7 +434,7 @@ CollectProcessorData (
   CPU_FEATURES_DATA                    *CpuFeaturesData;\r
 \r
   CpuFeaturesData = (CPU_FEATURES_DATA *)Buffer;\r
-  ProcessorNumber = GetProcessorIndex ();\r
+  ProcessorNumber = GetProcessorIndex (CpuFeaturesData);\r
   CpuInfo = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo;\r
   //\r
   // collect processor information\r
@@ -416,21 +443,21 @@ CollectProcessorData (
   Entry = GetFirstNode (&CpuFeaturesData->FeatureList);\r
   while (!IsNull (&CpuFeaturesData->FeatureList, Entry)) {\r
     CpuFeature = CPU_FEATURE_ENTRY_FROM_LINK (Entry);\r
-    if (IsBitMaskMatch (CpuFeaturesData->SupportPcd, CpuFeature->FeatureMask)) {\r
-      if (CpuFeature->SupportFunc == NULL) {\r
-        //\r
-        // If SupportFunc is NULL, then the feature is supported.\r
-        //\r
-        SupportedMaskOr (\r
-          CpuFeaturesData->InitOrder[ProcessorNumber].FeaturesSupportedMask,\r
-          CpuFeature->FeatureMask\r
-          );\r
-      } else if (CpuFeature->SupportFunc (ProcessorNumber, CpuInfo, CpuFeature->ConfigData)) {\r
-        SupportedMaskOr (\r
-          CpuFeaturesData->InitOrder[ProcessorNumber].FeaturesSupportedMask,\r
-          CpuFeature->FeatureMask\r
-          );\r
-      }\r
+    if (CpuFeature->SupportFunc == NULL) {\r
+      //\r
+      // If SupportFunc is NULL, then the feature is supported.\r
+      //\r
+      SupportedMaskOr (\r
+        CpuFeaturesData->InitOrder[ProcessorNumber].FeaturesSupportedMask,\r
+        CpuFeature->FeatureMask,\r
+        CpuFeaturesData->BitMaskSize\r
+        );\r
+    } else if (CpuFeature->SupportFunc (ProcessorNumber, CpuInfo, CpuFeature->ConfigData)) {\r
+      SupportedMaskOr (\r
+        CpuFeaturesData->InitOrder[ProcessorNumber].FeaturesSupportedMask,\r
+        CpuFeature->FeatureMask,\r
+        CpuFeaturesData->BitMaskSize\r
+        );\r
     }\r
     Entry = Entry->ForwardLink;\r
   }\r
@@ -471,8 +498,9 @@ DumpRegisterTableOnProcessor (
     case Msr:\r
       DEBUG ((\r
         DebugPrintErrorLevel,\r
-        "Processor: %d:   MSR: %x, Bit Start: %d, Bit Length: %d, Value: %lx\r\n",\r
-        ProcessorNumber,\r
+        "Processor: %04d: Index %04d, MSR  : %08x, Bit Start: %02d, Bit Length: %02d, Value: %016lx\r\n",\r
+        (UINT32) ProcessorNumber,\r
+        (UINT32) FeatureIndex,\r
         RegisterTableEntry->Index,\r
         RegisterTableEntry->ValidBitStart,\r
         RegisterTableEntry->ValidBitLength,\r
@@ -482,8 +510,9 @@ DumpRegisterTableOnProcessor (
     case ControlRegister:\r
       DEBUG ((\r
         DebugPrintErrorLevel,\r
-        "Processor: %d:    CR: %x, Bit Start: %d, Bit Length: %d, Value: %lx\r\n",\r
-        ProcessorNumber,\r
+        "Processor: %04d: Index %04d, CR   : %08x, Bit Start: %02d, Bit Length: %02d, Value: %016lx\r\n",\r
+        (UINT32) ProcessorNumber,\r
+        (UINT32) FeatureIndex,\r
         RegisterTableEntry->Index,\r
         RegisterTableEntry->ValidBitStart,\r
         RegisterTableEntry->ValidBitLength,\r
@@ -493,8 +522,9 @@ DumpRegisterTableOnProcessor (
     case MemoryMapped:\r
       DEBUG ((\r
         DebugPrintErrorLevel,\r
-        "Processor: %d:  MMIO: %lx, Bit Start: %d, Bit Length: %d, Value: %lx\r\n",\r
-        ProcessorNumber,\r
+        "Processor: %04d: Index %04d, MMIO : %016lx, Bit Start: %02d, Bit Length: %02d, Value: %016lx\r\n",\r
+        (UINT32) ProcessorNumber,\r
+        (UINT32) FeatureIndex,\r
         RegisterTableEntry->Index | LShiftU64 (RegisterTableEntry->HighIndex, 32),\r
         RegisterTableEntry->ValidBitStart,\r
         RegisterTableEntry->ValidBitLength,\r
@@ -504,8 +534,9 @@ DumpRegisterTableOnProcessor (
     case CacheControl:\r
       DEBUG ((\r
         DebugPrintErrorLevel,\r
-        "Processor: %d: CACHE: %x, Bit Start: %d, Bit Length: %d, Value: %lx\r\n",\r
-        ProcessorNumber,\r
+        "Processor: %04d: Index %04d, CACHE: %08x, Bit Start: %02d, Bit Length: %02d, Value: %016lx\r\n",\r
+        (UINT32) ProcessorNumber,\r
+        (UINT32) FeatureIndex,\r
         RegisterTableEntry->Index,\r
         RegisterTableEntry->ValidBitStart,\r
         RegisterTableEntry->ValidBitLength,\r
@@ -515,9 +546,10 @@ DumpRegisterTableOnProcessor (
     case Semaphore:\r
       DEBUG ((\r
         DebugPrintErrorLevel,\r
-        "Processor: %d: Semaphore: Scope Value: %s\r\n",\r
-        ProcessorNumber,\r
-        mDependTypeStr[MIN (RegisterTableEntry->Value, InvalidDepType)]\r
+        "Processor: %04d: Index %04d, SEMAP: %s\r\n",\r
+        (UINT32) ProcessorNumber,\r
+        (UINT32) FeatureIndex,\r
+        mDependTypeStr[MIN ((UINT32)RegisterTableEntry->Value, InvalidDepType)]\r
         ));\r
       break;\r
 \r
@@ -527,6 +559,32 @@ DumpRegisterTableOnProcessor (
   }\r
 }\r
 \r
+/**\r
+  Get the biggest dependence type.\r
+  PackageDepType > CoreDepType > ThreadDepType > NoneDepType.\r
+\r
+  @param[in]  BeforeDep           Before dependence type.\r
+  @param[in]  AfterDep            After dependence type.\r
+  @param[in]  NoneNeibBeforeDep   Before dependence type for not neighborhood features.\r
+  @param[in]  NoneNeibAfterDep    After dependence type for not neighborhood features.\r
+\r
+  @retval  Return the biggest dependence type.\r
+**/\r
+CPU_FEATURE_DEPENDENCE_TYPE\r
+BiggestDep (\r
+  IN CPU_FEATURE_DEPENDENCE_TYPE  BeforeDep,\r
+  IN CPU_FEATURE_DEPENDENCE_TYPE  AfterDep,\r
+  IN CPU_FEATURE_DEPENDENCE_TYPE  NoneNeibBeforeDep,\r
+  IN CPU_FEATURE_DEPENDENCE_TYPE  NoneNeibAfterDep\r
+  )\r
+{\r
+  CPU_FEATURE_DEPENDENCE_TYPE Bigger;\r
+\r
+  Bigger = MAX (BeforeDep, AfterDep);\r
+  Bigger = MAX (Bigger, NoneNeibBeforeDep);\r
+  return MAX(Bigger, NoneNeibAfterDep);\r
+}\r
+\r
 /**\r
   Analysis register CPU features on each processor and save CPU setting in CPU register table.\r
 \r
@@ -551,6 +609,8 @@ AnalysisProcessorFeatures (
   BOOLEAN                              Success;\r
   CPU_FEATURE_DEPENDENCE_TYPE          BeforeDep;\r
   CPU_FEATURE_DEPENDENCE_TYPE          AfterDep;\r
+  CPU_FEATURE_DEPENDENCE_TYPE          NoneNeibBeforeDep;\r
+  CPU_FEATURE_DEPENDENCE_TYPE          NoneNeibAfterDep;\r
 \r
   CpuFeaturesData = GetCpuFeaturesData ();\r
   CpuFeaturesData->CapabilityPcd = AllocatePool (CpuFeaturesData->BitMaskSize);\r
@@ -561,21 +621,14 @@ AnalysisProcessorFeatures (
     //\r
     // Calculate the last capability on all processors\r
     //\r
-    SupportedMaskAnd (CpuFeaturesData->CapabilityPcd, CpuInitOrder->FeaturesSupportedMask);\r
+    SupportedMaskAnd (CpuFeaturesData->CapabilityPcd, CpuInitOrder->FeaturesSupportedMask, CpuFeaturesData->BitMaskSize);\r
   }\r
   //\r
   // Calculate the last setting\r
   //\r
-\r
   CpuFeaturesData->SettingPcd = AllocateCopyPool (CpuFeaturesData->BitMaskSize, CpuFeaturesData->CapabilityPcd);\r
   ASSERT (CpuFeaturesData->SettingPcd != NULL);\r
-  SupportedMaskAnd (CpuFeaturesData->SettingPcd, CpuFeaturesData->ConfigurationPcd);\r
-\r
-  //\r
-  // Save PCDs and display CPU PCDs\r
-  //\r
-  SetCapabilityPcd (CpuFeaturesData->CapabilityPcd);\r
-  SetSettingPcd (CpuFeaturesData->SettingPcd);\r
+  SupportedMaskAnd (CpuFeaturesData->SettingPcd, PcdGetPtr (PcdCpuFeaturesSetting), CpuFeaturesData->BitMaskSize);\r
 \r
   //\r
   // Dump the last CPU feature list\r
@@ -585,8 +638,8 @@ AnalysisProcessorFeatures (
     Entry = GetFirstNode (&CpuFeaturesData->FeatureList);\r
     while (!IsNull (&CpuFeaturesData->FeatureList, Entry)) {\r
       CpuFeature = CPU_FEATURE_ENTRY_FROM_LINK (Entry);\r
-      if (IsBitMaskMatch (CpuFeature->FeatureMask, CpuFeaturesData->CapabilityPcd)) {\r
-        if (IsBitMaskMatch (CpuFeature->FeatureMask, CpuFeaturesData->SettingPcd)) {\r
+      if (IsBitMaskMatch (CpuFeature->FeatureMask, CpuFeaturesData->CapabilityPcd, CpuFeaturesData->BitMaskSize)) {\r
+        if (IsBitMaskMatch (CpuFeature->FeatureMask, CpuFeaturesData->SettingPcd, CpuFeaturesData->BitMaskSize)) {\r
           DEBUG ((DEBUG_INFO, "[Enable   ] "));\r
         } else {\r
           DEBUG ((DEBUG_INFO, "[Disable  ] "));\r
@@ -594,19 +647,23 @@ AnalysisProcessorFeatures (
       } else {\r
         DEBUG ((DEBUG_INFO, "[Unsupport] "));\r
       }\r
-      DumpCpuFeature (CpuFeature);\r
+      DumpCpuFeature (CpuFeature, CpuFeaturesData->BitMaskSize);\r
       Entry = Entry->ForwardLink;\r
     }\r
-    DEBUG ((DEBUG_INFO, "PcdCpuFeaturesSupport:\n"));\r
-    DumpCpuFeatureMask (CpuFeaturesData->SupportPcd);\r
-    DEBUG ((DEBUG_INFO, "PcdCpuFeaturesUserConfiguration:\n"));\r
-    DumpCpuFeatureMask (CpuFeaturesData->ConfigurationPcd);\r
     DEBUG ((DEBUG_INFO, "PcdCpuFeaturesCapability:\n"));\r
-    DumpCpuFeatureMask (CpuFeaturesData->CapabilityPcd);\r
-    DEBUG ((DEBUG_INFO, "PcdCpuFeaturesSetting:\n"));\r
-    DumpCpuFeatureMask (CpuFeaturesData->SettingPcd);\r
+    DumpCpuFeatureMask (CpuFeaturesData->CapabilityPcd, CpuFeaturesData->BitMaskSize);\r
+    DEBUG ((DEBUG_INFO, "Origin PcdCpuFeaturesSetting:\n"));\r
+    DumpCpuFeatureMask (PcdGetPtr (PcdCpuFeaturesSetting), CpuFeaturesData->BitMaskSize);\r
+    DEBUG ((DEBUG_INFO, "Final PcdCpuFeaturesSetting:\n"));\r
+    DumpCpuFeatureMask (CpuFeaturesData->SettingPcd, CpuFeaturesData->BitMaskSize);\r
   );\r
 \r
+  //\r
+  // Save PCDs and display CPU PCDs\r
+  //\r
+  SetCapabilityPcd (CpuFeaturesData->CapabilityPcd, CpuFeaturesData->BitMaskSize);\r
+  SetSettingPcd (CpuFeaturesData->SettingPcd, CpuFeaturesData->BitMaskSize);\r
+\r
   for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {\r
     CpuInitOrder = &CpuFeaturesData->InitOrder[ProcessorNumber];\r
     Entry = GetFirstNode (&CpuFeaturesData->FeatureList);\r
@@ -615,7 +672,7 @@ AnalysisProcessorFeatures (
       // Insert each feature into processor's order list\r
       //\r
       CpuFeature = CPU_FEATURE_ENTRY_FROM_LINK (Entry);\r
-      if (IsBitMaskMatch (CpuFeature->FeatureMask, CpuFeaturesData->CapabilityPcd)) {\r
+      if (IsBitMaskMatch (CpuFeature->FeatureMask, CpuFeaturesData->CapabilityPcd, CpuFeaturesData->BitMaskSize)) {\r
         CpuFeatureInOrder = AllocateCopyPool (sizeof (CPU_FEATURES_ENTRY), CpuFeature);\r
         ASSERT (CpuFeatureInOrder != NULL);\r
         InsertTailList (&CpuInitOrder->OrderList, &CpuFeatureInOrder->Link);\r
@@ -627,27 +684,22 @@ AnalysisProcessorFeatures (
     //\r
     CpuInfo = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo;\r
     Entry = GetFirstNode (&CpuInitOrder->OrderList);\r
-    NextEntry = Entry->ForwardLink;\r
     while (!IsNull (&CpuInitOrder->OrderList, Entry)) {\r
       CpuFeatureInOrder = CPU_FEATURE_ENTRY_FROM_LINK (Entry);\r
-      if (!IsNull (&CpuInitOrder->OrderList, NextEntry)) {\r
-        NextCpuFeatureInOrder = CPU_FEATURE_ENTRY_FROM_LINK (NextEntry);\r
-      } else {\r
-        NextCpuFeatureInOrder = NULL;\r
-      }\r
+\r
       Success = FALSE;\r
-      if (IsBitMaskMatch (CpuFeatureInOrder->FeatureMask, CpuFeaturesData->SettingPcd)) {\r
+      if (IsBitMaskMatch (CpuFeatureInOrder->FeatureMask, CpuFeaturesData->SettingPcd, CpuFeaturesData->BitMaskSize)) {\r
         Status = CpuFeatureInOrder->InitializeFunc (ProcessorNumber, CpuInfo, CpuFeatureInOrder->ConfigData, TRUE);\r
         if (EFI_ERROR (Status)) {\r
           //\r
           // Clean the CpuFeatureInOrder->FeatureMask in setting PCD.\r
           //\r
-          SupportedMaskCleanBit (CpuFeaturesData->SettingPcd, CpuFeatureInOrder->FeatureMask);\r
+          SupportedMaskCleanBit (CpuFeaturesData->SettingPcd, CpuFeatureInOrder->FeatureMask, CpuFeaturesData->BitMaskSize);\r
           if (CpuFeatureInOrder->FeatureName != NULL) {\r
             DEBUG ((DEBUG_WARN, "Warning :: Failed to enable Feature: Name = %a.\n", CpuFeatureInOrder->FeatureName));\r
           } else {\r
             DEBUG ((DEBUG_WARN, "Warning :: Failed to enable Feature: Mask = "));\r
-            DumpCpuFeatureMask (CpuFeatureInOrder->FeatureMask);\r
+            DumpCpuFeatureMask (CpuFeatureInOrder->FeatureMask, CpuFeaturesData->BitMaskSize);\r
           }\r
         } else {\r
           Success = TRUE;\r
@@ -659,7 +711,7 @@ AnalysisProcessorFeatures (
             DEBUG ((DEBUG_WARN, "Warning :: Failed to disable Feature: Name = %a.\n", CpuFeatureInOrder->FeatureName));\r
           } else {\r
             DEBUG ((DEBUG_WARN, "Warning :: Failed to disable Feature: Mask = "));\r
-            DumpCpuFeatureMask (CpuFeatureInOrder->FeatureMask);\r
+            DumpCpuFeatureMask (CpuFeatureInOrder->FeatureMask, CpuFeaturesData->BitMaskSize);\r
           }\r
         } else {\r
           Success = TRUE;\r
@@ -667,31 +719,43 @@ AnalysisProcessorFeatures (
       }\r
 \r
       if (Success) {\r
-        //\r
-        // If feature has dependence with the next feature (ONLY care core/package dependency).\r
-        // and feature initialize succeed, add sync semaphere here.\r
-        //\r
-        if (NextCpuFeatureInOrder != NULL) {\r
+        NextEntry = Entry->ForwardLink;\r
+        if (!IsNull (&CpuInitOrder->OrderList, NextEntry)) {\r
+          NextCpuFeatureInOrder = CPU_FEATURE_ENTRY_FROM_LINK (NextEntry);\r
+\r
+          //\r
+          // If feature has dependence with the next feature (ONLY care core/package dependency).\r
+          // and feature initialize succeed, add sync semaphere here.\r
+          //\r
           BeforeDep = DetectFeatureScope (CpuFeatureInOrder, TRUE, NextCpuFeatureInOrder->FeatureMask);\r
           AfterDep  = DetectFeatureScope (NextCpuFeatureInOrder, FALSE, CpuFeatureInOrder->FeatureMask);\r
+          //\r
+          // Check whether next feature has After type dependence with not neighborhood CPU\r
+          // Features in former CPU features.\r
+          //\r
+          NoneNeibAfterDep = DetectNoneNeighborhoodFeatureScope(NextCpuFeatureInOrder, FALSE, &CpuInitOrder->OrderList);\r
         } else {\r
-          BeforeDep = DetectFeatureScope (CpuFeatureInOrder, TRUE, NULL);\r
-          AfterDep = NoneDepType;\r
+          BeforeDep        = NoneDepType;\r
+          AfterDep         = NoneDepType;\r
+          NoneNeibAfterDep = NoneDepType;\r
         }\r
         //\r
-        // Assume only one of the depend is valid.\r
+        // Check whether current feature has Before type dependence with none neighborhood\r
+        // CPU features in after Cpu features.\r
+        //\r
+        NoneNeibBeforeDep = DetectNoneNeighborhoodFeatureScope(CpuFeatureInOrder, TRUE, &CpuInitOrder->OrderList);\r
+\r
+        //\r
+        // Get the biggest dependence and add semaphore for it.\r
+        // PackageDepType > CoreDepType > ThreadDepType > NoneDepType.\r
         //\r
-        ASSERT (!(BeforeDep > ThreadDepType && AfterDep > ThreadDepType));\r
+        BeforeDep = BiggestDep(BeforeDep, AfterDep, NoneNeibBeforeDep, NoneNeibAfterDep);\r
         if (BeforeDep > ThreadDepType) {\r
           CPU_REGISTER_TABLE_WRITE32 (ProcessorNumber, Semaphore, 0, BeforeDep);\r
         }\r
-        if (AfterDep > ThreadDepType) {\r
-          CPU_REGISTER_TABLE_WRITE32 (ProcessorNumber, Semaphore, 0, AfterDep);\r
-        }\r
       }\r
 \r
-      Entry     = Entry->ForwardLink;\r
-      NextEntry = Entry->ForwardLink;\r
+      Entry = Entry->ForwardLink;\r
     }\r
 \r
     //\r
@@ -699,7 +763,7 @@ AnalysisProcessorFeatures (
     // again during initialize the features.\r
     //\r
     DEBUG ((DEBUG_INFO, "Dump final value for PcdCpuFeaturesSetting:\n"));\r
-    DumpCpuFeatureMask (CpuFeaturesData->SettingPcd);\r
+    DumpCpuFeatureMask (CpuFeaturesData->SettingPcd, CpuFeaturesData->BitMaskSize);\r
 \r
     //\r
     // Dump the RegisterTable\r
@@ -749,6 +813,58 @@ LibWaitForSemaphore (
              ) != Value);\r
 }\r
 \r
+/**\r
+  Read / write CR value.\r
+\r
+  @param[in]      CrIndex         The CR index which need to read/write.\r
+  @param[in]      Read            Read or write. TRUE is read.\r
+  @param[in,out]  CrValue         CR value.\r
+\r
+  @retval    EFI_SUCCESS means read/write success, else return EFI_UNSUPPORTED.\r
+**/\r
+UINTN\r
+ReadWriteCr (\r
+  IN     UINT32       CrIndex,\r
+  IN     BOOLEAN      Read,\r
+  IN OUT UINTN        *CrValue\r
+  )\r
+{\r
+  switch (CrIndex) {\r
+  case 0:\r
+    if (Read) {\r
+      *CrValue = AsmReadCr0 ();\r
+    } else {\r
+      AsmWriteCr0 (*CrValue);\r
+    }\r
+    break;\r
+  case 2:\r
+    if (Read) {\r
+      *CrValue = AsmReadCr2 ();\r
+    } else {\r
+      AsmWriteCr2 (*CrValue);\r
+    }\r
+    break;\r
+  case 3:\r
+    if (Read) {\r
+      *CrValue = AsmReadCr3 ();\r
+    } else {\r
+      AsmWriteCr3 (*CrValue);\r
+    }\r
+    break;\r
+  case 4:\r
+    if (Read) {\r
+      *CrValue = AsmReadCr4 ();\r
+    } else {\r
+      AsmWriteCr4 (*CrValue);\r
+    }\r
+    break;\r
+  default:\r
+    return EFI_UNSUPPORTED;;\r
+  }\r
+\r
+  return EFI_SUCCESS;\r
+}\r
+\r
 /**\r
   Initialize the CPU registers from a register table.\r
 \r
@@ -773,12 +889,13 @@ ProgramProcessorRegister (
   CPU_REGISTER_TABLE_ENTRY  *RegisterTableEntryHead;\r
   volatile UINT32           *SemaphorePtr;\r
   UINT32                    FirstThread;\r
-  UINT32                    PackageThreadsCount;\r
   UINT32                    CurrentThread;\r
+  UINT32                    CurrentCore;\r
   UINTN                     ProcessorIndex;\r
-  UINTN                     ThreadIndex;\r
-  UINTN                     ValidThreadCount;\r
-  UINT32                    *ValidCoreCountPerPackage;\r
+  UINT32                    *ThreadCountPerPackage;\r
+  UINT8                     *ThreadCountPerCore;\r
+  EFI_STATUS                Status;\r
+  UINT64                    CurrentValue;\r
 \r
   //\r
   // Traverse Register Table of this logical processor\r
@@ -789,21 +906,6 @@ ProgramProcessorRegister (
 \r
     RegisterTableEntry = &RegisterTableEntryHead[Index];\r
 \r
-    DEBUG_CODE_BEGIN ();\r
-      AcquireSpinLock (&CpuFlags->ConsoleLogLock);\r
-      ThreadIndex = ApLocation->Package * CpuStatus->MaxCoreCount * CpuStatus->MaxThreadCount +\r
-              ApLocation->Core * CpuStatus->MaxThreadCount +\r
-              ApLocation->Thread;\r
-      DEBUG ((\r
-        DEBUG_INFO,\r
-        "Processor = %lu, Entry Index %lu, Type = %s!\n",\r
-        (UINT64)ThreadIndex,\r
-        (UINT64)Index,\r
-        mRegisterTypeStr[MIN ((REGISTER_TYPE)RegisterTableEntry->RegisterType, InvalidReg)]\r
-        ));\r
-      ReleaseSpinLock (&CpuFlags->ConsoleLogLock);\r
-    DEBUG_CODE_END ();\r
-\r
     //\r
     // Check the type of specified register\r
     //\r
@@ -812,60 +914,51 @@ ProgramProcessorRegister (
     // The specified register is Control Register\r
     //\r
     case ControlRegister:\r
-      switch (RegisterTableEntry->Index) {\r
-      case 0:\r
-        Value = AsmReadCr0 ();\r
-        Value = (UINTN) BitFieldWrite64 (\r
-                          Value,\r
-                          RegisterTableEntry->ValidBitStart,\r
-                          RegisterTableEntry->ValidBitStart + RegisterTableEntry->ValidBitLength - 1,\r
-                          RegisterTableEntry->Value\r
-                          );\r
-        AsmWriteCr0 (Value);\r
-        break;\r
-      case 2:\r
-        Value = AsmReadCr2 ();\r
-        Value = (UINTN) BitFieldWrite64 (\r
-                          Value,\r
-                          RegisterTableEntry->ValidBitStart,\r
-                          RegisterTableEntry->ValidBitStart + RegisterTableEntry->ValidBitLength - 1,\r
-                          RegisterTableEntry->Value\r
-                          );\r
-        AsmWriteCr2 (Value);\r
-        break;\r
-      case 3:\r
-        Value = AsmReadCr3 ();\r
-        Value = (UINTN) BitFieldWrite64 (\r
-                          Value,\r
-                          RegisterTableEntry->ValidBitStart,\r
-                          RegisterTableEntry->ValidBitStart + RegisterTableEntry->ValidBitLength - 1,\r
-                          RegisterTableEntry->Value\r
-                          );\r
-        AsmWriteCr3 (Value);\r
-        break;\r
-      case 4:\r
-        Value = AsmReadCr4 ();\r
-        Value = (UINTN) BitFieldWrite64 (\r
-                          Value,\r
-                          RegisterTableEntry->ValidBitStart,\r
-                          RegisterTableEntry->ValidBitStart + RegisterTableEntry->ValidBitLength - 1,\r
-                          RegisterTableEntry->Value\r
-                          );\r
-        AsmWriteCr4 (Value);\r
-        break;\r
-      case 8:\r
-        //\r
-        //  Do we need to support CR8?\r
-        //\r
-        break;\r
-      default:\r
+      Status = ReadWriteCr (RegisterTableEntry->Index, TRUE, &Value);\r
+      if (EFI_ERROR (Status)) {\r
         break;\r
       }\r
+      if (RegisterTableEntry->TestThenWrite) {\r
+        CurrentValue = BitFieldRead64 (\r
+                         Value,\r
+                         RegisterTableEntry->ValidBitStart,\r
+                         RegisterTableEntry->ValidBitStart + RegisterTableEntry->ValidBitLength - 1\r
+                         );\r
+        if (CurrentValue == RegisterTableEntry->Value) {\r
+          break;\r
+        }\r
+      }\r
+      Value = (UINTN) BitFieldWrite64 (\r
+                        Value,\r
+                        RegisterTableEntry->ValidBitStart,\r
+                        RegisterTableEntry->ValidBitStart + RegisterTableEntry->ValidBitLength - 1,\r
+                        RegisterTableEntry->Value\r
+                        );\r
+      ReadWriteCr (RegisterTableEntry->Index, FALSE, &Value);\r
       break;\r
+\r
     //\r
     // The specified register is Model Specific Register\r
     //\r
     case Msr:\r
+      if (RegisterTableEntry->TestThenWrite) {\r
+        Value = (UINTN)AsmReadMsr64 (RegisterTableEntry->Index);\r
+        if (RegisterTableEntry->ValidBitLength >= 64) {\r
+          if (Value == RegisterTableEntry->Value) {\r
+            break;\r
+          }\r
+        } else {\r
+          CurrentValue = BitFieldRead64 (\r
+                           Value,\r
+                           RegisterTableEntry->ValidBitStart,\r
+                           RegisterTableEntry->ValidBitStart + RegisterTableEntry->ValidBitLength - 1\r
+                           );\r
+          if (CurrentValue == RegisterTableEntry->Value) {\r
+            break;\r
+          }\r
+        }\r
+      }\r
+\r
       if (RegisterTableEntry->ValidBitLength >= 64) {\r
         //\r
         // If length is not less than 64 bits, then directly write without reading\r
@@ -928,30 +1021,47 @@ ProgramProcessorRegister (
       //  V(0...n)       V(0...n)      ...           V(0...n)\r
       //  n * P(0)       n * P(1)      ...           n * P(n)\r
       //\r
-      SemaphorePtr = CpuFlags->SemaphoreCount;\r
       switch (RegisterTableEntry->Value) {\r
       case CoreDepType:\r
+        SemaphorePtr = CpuFlags->CoreSemaphoreCount;\r
+        ThreadCountPerCore = (UINT8 *)(UINTN)CpuStatus->ThreadCountPerCore;\r
+\r
+        CurrentCore = ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core;\r
         //\r
         // Get Offset info for the first thread in the core which current thread belongs to.\r
         //\r
-        FirstThread = (ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core) * CpuStatus->MaxThreadCount;\r
+        FirstThread   = CurrentCore * CpuStatus->MaxThreadCount;\r
         CurrentThread = FirstThread + ApLocation->Thread;\r
+\r
+        //\r
+        // Different cores may have different valid threads in them. If driver maintail clearly\r
+        // thread index in different cores, the logic will be much complicated.\r
+        // Here driver just simply records the max thread number in all cores and use it as expect\r
+        // thread number for all cores.\r
+        // In below two steps logic, first current thread will Release semaphore for each thread\r
+        // in current core. Maybe some threads are not valid in this core, but driver don't\r
+        // care. Second, driver will let current thread wait semaphore for all valid threads in\r
+        // current core. Because only the valid threads will do release semaphore for this\r
+        // thread, driver here only need to wait the valid thread count.\r
+        //\r
+\r
         //\r
-        // First Notify all threads in current Core that this thread has ready.\r
+        // First Notify ALL THREADs in current Core that this thread is ready.\r
         //\r
         for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {\r
-          LibReleaseSemaphore ((UINT32 *) &SemaphorePtr[FirstThread + ProcessorIndex]);\r
+          LibReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);\r
         }\r
         //\r
-        // Second, check whether all valid threads in current core have ready.\r
+        // Second, check whether all VALID THREADs (not all threads) in current core are ready.\r
         //\r
-        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {\r
+        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerCore[CurrentCore]; ProcessorIndex ++) {\r
           LibWaitForSemaphore (&SemaphorePtr[CurrentThread]);\r
         }\r
         break;\r
 \r
       case PackageDepType:\r
-        ValidCoreCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ValidCoreCountPerPackage;\r
+        SemaphorePtr = CpuFlags->PackageSemaphoreCount;\r
+        ThreadCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ThreadCountPerPackage;\r
         //\r
         // Get Offset info for the first thread in the package which current thread belongs to.\r
         //\r
@@ -959,18 +1069,13 @@ ProgramProcessorRegister (
         //\r
         // Get the possible threads count for current package.\r
         //\r
-        PackageThreadsCount = CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount;\r
         CurrentThread = FirstThread + CpuStatus->MaxThreadCount * ApLocation->Core + ApLocation->Thread;\r
-        //\r
-        // Get the valid thread count for current package.\r
-        //\r
-        ValidThreadCount = CpuStatus->MaxThreadCount * ValidCoreCountPerPackage[ApLocation->Package];\r
 \r
         //\r
-        // Different packages may have different valid cores in them. If driver maintail clearly\r
-        // cores number in different packages, the logic will be much complicated.\r
-        // Here driver just simply records the max core number in all packages and use it as expect\r
-        // core number for all packages.\r
+        // Different packages may have different valid threads in them. If driver maintail clearly\r
+        // thread index in different packages, the logic will be much complicated.\r
+        // Here driver just simply records the max thread number in all packages and use it as expect\r
+        // thread number for all packages.\r
         // In below two steps logic, first current thread will Release semaphore for each thread\r
         // in current package. Maybe some threads are not valid in this package, but driver don't\r
         // care. Second, driver will let current thread wait semaphore for all valid threads in\r
@@ -979,15 +1084,15 @@ ProgramProcessorRegister (
         //\r
 \r
         //\r
-        // First Notify ALL THREADS in current package that this thread has ready.\r
+        // First Notify ALL THREADS in current package that this thread is ready.\r
         //\r
-        for (ProcessorIndex = 0; ProcessorIndex < PackageThreadsCount ; ProcessorIndex ++) {\r
-          LibReleaseSemaphore ((UINT32 *) &SemaphorePtr[FirstThread + ProcessorIndex]);\r
+        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount; ProcessorIndex ++) {\r
+          LibReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);\r
         }\r
         //\r
-        // Second, check whether VALID THREADS (not all threads) in current package have ready.\r
+        // Second, check whether VALID THREADS (not all threads) in current package are ready.\r
         //\r
-        for (ProcessorIndex = 0; ProcessorIndex < ValidThreadCount; ProcessorIndex ++) {\r
+        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerPackage[ApLocation->Package]; ProcessorIndex ++) {\r
           LibWaitForSemaphore (&SemaphorePtr[CurrentThread]);\r
         }\r
         break;\r
@@ -1026,7 +1131,7 @@ SetProcessorRegister (
   CpuFeaturesData = (CPU_FEATURES_DATA *) Buffer;\r
   AcpiCpuData = CpuFeaturesData->AcpiCpuData;\r
 \r
-  RegisterTables = (CPU_REGISTER_TABLE *)(UINTN)AcpiCpuData->RegisterTable;\r
+  RegisterTables = (CPU_REGISTER_TABLE *)(UINTN)AcpiCpuData->CpuFeatureInitData.RegisterTable;\r
 \r
   InitApicId = GetInitialApicId ();\r
   RegisterTable = NULL;\r
@@ -1042,8 +1147,8 @@ SetProcessorRegister (
 \r
   ProgramProcessorRegister (\r
     RegisterTable,\r
-    (EFI_CPU_PHYSICAL_LOCATION *)(UINTN)AcpiCpuData->ApLocation + ProcIndex,\r
-    &AcpiCpuData->CpuStatus,\r
+    (EFI_CPU_PHYSICAL_LOCATION *)(UINTN)AcpiCpuData->CpuFeatureInitData.ApLocation + ProcIndex,\r
+    &AcpiCpuData->CpuFeatureInitData.CpuStatus,\r
     &CpuFeaturesData->CpuFlags\r
     );\r
 }\r
@@ -1062,26 +1167,24 @@ CpuFeaturesDetect (
   VOID\r
   )\r
 {\r
-  UINTN                  NumberOfCpus;\r
-  UINTN                  NumberOfEnabledProcessors;\r
   CPU_FEATURES_DATA      *CpuFeaturesData;\r
 \r
   CpuFeaturesData = GetCpuFeaturesData();\r
 \r
-  GetNumberOfProcessor (&NumberOfCpus, &NumberOfEnabledProcessors);\r
-\r
-  CpuInitDataInitialize (NumberOfCpus);\r
+  CpuInitDataInitialize ();\r
 \r
-  //\r
-  // Wakeup all APs for data collection.\r
-  //\r
-  StartupAPsWorker (CollectProcessorData, NULL);\r
+  if (CpuFeaturesData->NumberOfCpus > 1) {\r
+    //\r
+    // Wakeup all APs for data collection.\r
+    //\r
+    StartupAllAPsWorker (CollectProcessorData, NULL);\r
+  }\r
 \r
   //\r
   // Collect data on BSP\r
   //\r
   CollectProcessorData (CpuFeaturesData);\r
 \r
-  AnalysisProcessorFeatures (NumberOfCpus);\r
+  AnalysisProcessorFeatures (CpuFeaturesData->NumberOfCpus);\r
 }\r
 \r